450 files changed, 37108 insertions, 8516 deletions
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index c5987f518cb2..b648e3f95c01 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -367,7 +367,7 @@ static struct net_device *corkscrew_scan(int unit);
 static int corkscrew_setup(struct net_device *dev, int ioaddr,
 			    struct pnp_dev *idev, int card_number);
 static int corkscrew_open(struct net_device *dev);
-static void corkscrew_timer(unsigned long arg);
+static void corkscrew_timer(struct timer_list *t);
 static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb,
 					struct net_device *dev);
 static int corkscrew_rx(struct net_device *dev);
@@ -627,7 +627,7 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr,
 
 	spin_lock_init(&vp->lock);
 
-	setup_timer(&vp->timer, corkscrew_timer, (unsigned long) dev);
+	timer_setup(&vp->timer, corkscrew_timer, 0);
 
 	/* Read the station address from the EEPROM. */
 	EL3WINDOW(0);
@@ -869,11 +869,11 @@ static int corkscrew_open(struct net_device *dev)
 	return 0;
 }
 
-static void corkscrew_timer(unsigned long data)
+static void corkscrew_timer(struct timer_list *t)
 {
 #ifdef AUTOMEDIA
-	struct net_device *dev = (struct net_device *) data;
-	struct corkscrew_private *vp = netdev_priv(dev);
+	struct corkscrew_private *vp = from_timer(vp, t, timer);
+	struct net_device *dev = vp->our_dev;
 	int ioaddr = dev->base_addr;
 	unsigned long flags;
 	int ok = 0;
diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c
index 47c844cc9d27..3044a6f35f04 100644
--- a/drivers/net/ethernet/3com/3c574_cs.c
+++ b/drivers/net/ethernet/3com/3c574_cs.c
@@ -225,7 +225,7 @@ static unsigned short read_eeprom(unsigned int ioaddr, int index);
 static void tc574_wait_for_completion(struct net_device *dev, int cmd);
 
 static void tc574_reset(struct net_device *dev);
-static void media_check(unsigned long arg);
+static void media_check(struct timer_list *t);
 static int el3_open(struct net_device *dev);
 static netdev_tx_t el3_start_xmit(struct sk_buff *skb,
 					struct net_device *dev);
@@ -377,7 +377,7 @@ static int tc574_config(struct pcmcia_device *link)
 		lp->autoselect = config & Autoselect ? 1 : 0;
 	}
 
-	init_timer(&lp->media);
+	timer_setup(&lp->media, media_check, 0);
 
 	{
 		int phy;
@@ -681,8 +681,6 @@ static int el3_open(struct net_device *dev)
 	netif_start_queue(dev);
 	
 	tc574_reset(dev);
-	lp->media.function = media_check;
-	lp->media.data = (unsigned long) dev;
 	lp->media.expires = jiffies + HZ;
 	add_timer(&lp->media);
 	
@@ -859,10 +857,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id)
 	(and as a last resort, poll the NIC for events), and to monitor
 	the MII, reporting changes in cable status.
 */
-static void media_check(unsigned long arg)
+static void media_check(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) arg;
-	struct el3_private *lp = netdev_priv(dev);
+	struct el3_private *lp = from_timer(lp, t, media);
+	struct net_device *dev = lp->p_dev->priv;
 	unsigned int ioaddr = dev->base_addr;
 	unsigned long flags;
 	unsigned short /* cable, */ media, partner;
@@ -1048,6 +1046,7 @@ static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	switch(cmd) {
 	case SIOCGMIIPHY:		/* Get the address of the PHY in use. */
 		data->phy_id = phy;
+		/* fall through */
 	case SIOCGMIIREG:		/* Read the specified MII register. */
 		{
 			int saved_window;
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index e28254a00599..2b2695311bda 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -163,7 +163,7 @@ static void tc589_release(struct pcmcia_device *link);
 
 static u16 read_eeprom(unsigned int ioaddr, int index);
 static void tc589_reset(struct net_device *dev);
-static void media_check(unsigned long arg);
+static void media_check(struct timer_list *t);
 static int el3_config(struct net_device *dev, struct ifmap *map);
 static int el3_open(struct net_device *dev);
 static netdev_tx_t el3_start_xmit(struct sk_buff *skb,
@@ -517,7 +517,7 @@ static int el3_open(struct net_device *dev)
 	netif_start_queue(dev);
 
 	tc589_reset(dev);
-	setup_timer(&lp->media, media_check, (unsigned long)dev);
+	timer_setup(&lp->media, media_check, 0);
 	mod_timer(&lp->media, jiffies + HZ);
 
 	dev_dbg(&link->dev, "%s: opened, status %4.4x.\n",
@@ -676,10 +676,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id)
 	return IRQ_RETVAL(handled);
 }
 
-static void media_check(unsigned long arg)
+static void media_check(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)(arg);
-	struct el3_private *lp = netdev_priv(dev);
+	struct el3_private *lp = from_timer(lp, t, media);
+	struct net_device *dev = lp->p_dev->priv;
 	unsigned int ioaddr = dev->base_addr;
 	u16 media, errs;
 	unsigned long flags;
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 402d9090ad29..f4e13a7014bd 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -759,8 +759,8 @@ static int vortex_open(struct net_device *dev);
 static void mdio_sync(struct vortex_private *vp, int bits);
 static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
-static void vortex_timer(unsigned long arg);
-static void rx_oom_timer(unsigned long arg);
+static void vortex_timer(struct timer_list *t);
+static void rx_oom_timer(struct timer_list *t);
 static netdev_tx_t vortex_start_xmit(struct sk_buff *skb,
 				     struct net_device *dev);
 static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb,
@@ -1599,9 +1599,9 @@ vortex_up(struct net_device *dev)
 				dev->name, media_tbl[dev->if_port].name);
 	}
 
-	setup_timer(&vp->timer, vortex_timer, (unsigned long)dev);
+	timer_setup(&vp->timer, vortex_timer, 0);
 	mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait));
-	setup_timer(&vp->rx_oom_timer, rx_oom_timer, (unsigned long)dev);
+	timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0);
 
 	if (vortex_debug > 1)
 		pr_debug("%s: Initial media type %s.\n",
@@ -1784,10 +1784,10 @@ out:
 }
 
 static void
-vortex_timer(unsigned long data)
+vortex_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct vortex_private *vp = netdev_priv(dev);
+	struct vortex_private *vp = from_timer(vp, t, timer);
+	struct net_device *dev = vp->mii.dev;
 	void __iomem *ioaddr = vp->ioaddr;
 	int next_tick = 60*HZ;
 	int ok = 0;
@@ -2687,10 +2687,10 @@ boomerang_rx(struct net_device *dev)
  * for some memory.  Otherwise there is no way to restart the rx process.
  */
 static void
-rx_oom_timer(unsigned long arg)
+rx_oom_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)arg;
-	struct vortex_private *vp = netdev_priv(dev);
+	struct vortex_private *vp = from_timer(vp, t, rx_oom_timer);
+	struct net_device *dev = vp->mii.dev;
 
 	spin_lock_irq(&vp->lock);
 	if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)	/* This test is redundant, but makes me feel good */
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 3da1fc539ef9..7bddb8efb6d5 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -85,7 +85,7 @@ static struct net_device_stats *get_stats(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
 static void axnet_tx_timeout(struct net_device *dev);
 static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
-static void ei_watchdog(u_long arg);
+static void ei_watchdog(struct timer_list *t);
 static void axnet_reset_8390(struct net_device *dev);
 
 static int mdio_read(unsigned int addr, int phy_id, int loc);
@@ -483,7 +483,7 @@ static int axnet_open(struct net_device *dev)
     link->open++;
 
     info->link_status = 0x00;
-    setup_timer(&info->watchdog, ei_watchdog, (u_long)dev);
+    timer_setup(&info->watchdog, ei_watchdog, 0);
     mod_timer(&info->watchdog, jiffies + HZ);
 
     return ax_open(dev);
@@ -547,10 +547,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id)
     return ax_interrupt(irq, dev_id);
 }
 
-static void ei_watchdog(u_long arg)
+static void ei_watchdog(struct timer_list *t)
 {
-    struct net_device *dev = (struct net_device *)(arg);
-    struct axnet_dev *info = PRIV(dev);
+    struct axnet_dev *info = from_timer(info, t, watchdog);
+    struct net_device *dev = info->p_dev->priv;
     unsigned int nic_base = dev->base_addr;
     unsigned int mii_addr = nic_base + AXNET_MII_EEP;
     u_short link;
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index bd0a2a14b649..bcad4a7fac9f 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -99,7 +99,7 @@ static int pcnet_open(struct net_device *dev);
 static int pcnet_close(struct net_device *dev);
 static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
-static void ei_watchdog(u_long arg);
+static void ei_watchdog(struct timer_list *t);
 static void pcnet_reset_8390(struct net_device *dev);
 static int set_config(struct net_device *dev, struct ifmap *map);
 static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
@@ -917,7 +917,7 @@ static int pcnet_open(struct net_device *dev)
 
     info->phy_id = info->eth_phy;
     info->link_status = 0x00;
-    setup_timer(&info->watchdog, ei_watchdog, (u_long)dev);
+    timer_setup(&info->watchdog, ei_watchdog, 0);
     mod_timer(&info->watchdog, jiffies + HZ);
 
     return ei_open(dev);
@@ -1006,10 +1006,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id)
     return ret;
 }
 
-static void ei_watchdog(u_long arg)
+static void ei_watchdog(struct timer_list *t)
 {
-    struct net_device *dev = (struct net_device *)arg;
-    struct pcnet_dev *info = PRIV(dev);
+    struct pcnet_dev *info = from_timer(info, t, watchdog);
+    struct net_device *dev = info->p_dev->priv;
     unsigned int nic_base = dev->base_addr;
     unsigned int mii_addr = nic_base + DLINK_GPIO;
     u_short link;
@@ -1107,6 +1107,7 @@ static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
     switch (cmd) {
     case SIOCGMIIPHY:
 	data->phy_id = info->phy_id;
+	/* fall through */
     case SIOCGMIIREG:		/* Read MII PHY register. */
 	data->val_out = mdio_read(mii_addr, data->phy_id, data->reg_num & 0x1f);
 	return 0;
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index a251de8d9a91..0658cde1586a 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -1650,9 +1650,8 @@ static int bfin_mac_probe(struct platform_device *pdev)
 	ndev->netdev_ops = &bfin_mac_netdev_ops;
 	ndev->ethtool_ops = &bfin_mac_ethtool_ops;
 
-	init_timer(&lp->tx_reclaim_timer);
-	lp->tx_reclaim_timer.data = (unsigned long)lp;
-	lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout;
+	setup_timer(&lp->tx_reclaim_timer, tx_reclaim_skb_timeout,
+		    (unsigned long)lp);
 
 	lp->flags = 0;
 	netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM);
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 54eff90e2f02..658e92f79d36 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3624,11 +3624,10 @@ static int et131x_open(struct net_device *netdev)
 	int result;
 
 	/* Start the timer to track NIC errors */
-	init_timer(&adapter->error_timer);
+	setup_timer(&adapter->error_timer, et131x_error_timer_handler,
+		    (unsigned long)adapter);
 	adapter->error_timer.expires = jiffies +
 		msecs_to_jiffies(TX_ERROR_PERIOD);
-	adapter->error_timer.function = et131x_error_timer_handler;
-	adapter->error_timer.data = (unsigned long)adapter;
 	add_timer(&adapter->error_timer);
 
 	result = request_irq(irq, et131x_isr,
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index 15a8096c60df..0b60921c392f 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -355,10 +355,10 @@ static void slic_xmit_complete(struct slic_device *sdev)
 {
 	struct slic_tx_queue *txq = &sdev->txq;
 	struct net_device *dev = sdev->netdev;
-	unsigned int idx = txq->done_idx;
 	struct slic_tx_buffer *buff;
 	unsigned int frames = 0;
 	unsigned int bytes = 0;
+	unsigned int idx;
 
 	/* Limit processing to SLIC_MAX_TX_COMPLETIONS frames to avoid that new
 	 * completions during processing keeps the loop running endlessly.
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 305dc1996b4e..4532e574ebcd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -627,6 +627,12 @@ enum ena_admin_flow_hash_proto {
 
 	ENA_ADMIN_RSS_NOT_IP	= 7,
 
+	/* TCPv6 with extension header */
+	ENA_ADMIN_RSS_TCP6_EX	= 8,
+
+	/* IPv6 with extension header */
+	ENA_ADMIN_RSS_IP6_EX	= 9,
+
 	ENA_ADMIN_RSS_PROTO_NUM	= 16,
 };
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 52beba8c7a39..bf2de5298005 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -63,6 +63,8 @@
 
 #define ENA_REGS_ADMIN_INTR_MASK 1
 
+#define ENA_POLL_MS	5
+
 /*****************************************************************************/
 /*****************************************************************************/
 /*****************************************************************************/
@@ -315,7 +317,7 @@ static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue
 					      cmd_size_in_bytes,
 					      comp,
 					      comp_size_in_bytes);
-	if (unlikely(IS_ERR(comp_ctx)))
+	if (IS_ERR(comp_ctx))
 		admin_queue->running_state = false;
 	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
 
@@ -533,7 +535,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
 			goto err;
 		}
 
-		msleep(100);
+		msleep(ENA_POLL_MS);
 	}
 
 	if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
@@ -746,6 +748,9 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
 {
 	u32 val, i;
 
+	/* Convert timeout from resolution of 100ms to ENA_POLL_MS */
+	timeout = (timeout * 100) / ENA_POLL_MS;
+
 	for (i = 0; i < timeout; i++) {
 		val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
 
@@ -758,8 +763,7 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
 			exp_state)
 			return 0;
 
-		/* The resolution of the timeout is 100ms */
-		msleep(100);
+		msleep(ENA_POLL_MS);
 	}
 
 	return -ETIME;
@@ -1130,7 +1134,7 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
 
 	comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
 					    comp, comp_size);
-	if (unlikely(IS_ERR(comp_ctx))) {
+	if (IS_ERR(comp_ctx)) {
 		if (comp_ctx == ERR_PTR(-ENODEV))
 			pr_debug("Failed to submit command [%ld]\n",
 				 PTR_ERR(comp_ctx));
@@ -1253,7 +1257,7 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
 	spin_lock_irqsave(&admin_queue->q_lock, flags);
 	while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
 		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
-		msleep(20);
+		msleep(ENA_POLL_MS);
 		spin_lock_irqsave(&admin_queue->q_lock, flags);
 	}
 	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 967020fb26ee..060cb18fa659 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -60,8 +60,8 @@ struct ena_stats {
 
 static const struct ena_stats ena_stats_global_strings[] = {
 	ENA_STAT_GLOBAL_ENTRY(tx_timeout),
-	ENA_STAT_GLOBAL_ENTRY(io_suspend),
-	ENA_STAT_GLOBAL_ENTRY(io_resume),
+	ENA_STAT_GLOBAL_ENTRY(suspend),
+	ENA_STAT_GLOBAL_ENTRY(resume),
 	ENA_STAT_GLOBAL_ENTRY(wd_expired),
 	ENA_STAT_GLOBAL_ENTRY(interface_up),
 	ENA_STAT_GLOBAL_ENTRY(interface_down),
@@ -81,6 +81,7 @@ static const struct ena_stats ena_stats_tx_strings[] = {
 	ENA_STAT_TX_ENTRY(doorbells),
 	ENA_STAT_TX_ENTRY(prepare_ctx_err),
 	ENA_STAT_TX_ENTRY(bad_req_id),
+	ENA_STAT_TX_ENTRY(missed_tx),
 };
 
 static const struct ena_stats ena_stats_rx_strings[] = {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index c6bd5e24005d..7451922c209d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -517,7 +517,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 
 
 		rc = ena_alloc_rx_page(rx_ring, rx_info,
-				       __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
+				       GFP_ATOMIC | __GFP_COMP);
 		if (unlikely(rc < 0)) {
 			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
 				   "failed to alloc buffer for rx queue %d\n",
@@ -2361,38 +2361,6 @@ static const struct net_device_ops ena_netdev_ops = {
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 };
 
-static void ena_device_io_suspend(struct work_struct *work)
-{
-	struct ena_adapter *adapter =
-		container_of(work, struct ena_adapter, suspend_io_task);
-	struct net_device *netdev = adapter->netdev;
-
-	/* ena_napi_disable_all disables only the IO handling.
-	 * We are still subject to AENQ keep alive watchdog.
-	 */
-	u64_stats_update_begin(&adapter->syncp);
-	adapter->dev_stats.io_suspend++;
-	u64_stats_update_begin(&adapter->syncp);
-	ena_napi_disable_all(adapter);
-	netif_tx_lock(netdev);
-	netif_device_detach(netdev);
-	netif_tx_unlock(netdev);
-}
-
-static void ena_device_io_resume(struct work_struct *work)
-{
-	struct ena_adapter *adapter =
-		container_of(work, struct ena_adapter, resume_io_task);
-	struct net_device *netdev = adapter->netdev;
-
-	u64_stats_update_begin(&adapter->syncp);
-	adapter->dev_stats.io_resume++;
-	u64_stats_update_end(&adapter->syncp);
-
-	netif_device_attach(netdev);
-	ena_napi_enable_all(adapter);
-}
-
 static int ena_device_validate_params(struct ena_adapter *adapter,
 				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
@@ -2561,38 +2529,31 @@ err_disable_msix:
 	return rc;
 }
 
-static void ena_fw_reset_device(struct work_struct *work)
+static void ena_destroy_device(struct ena_adapter *adapter)
 {
-	struct ena_com_dev_get_features_ctx get_feat_ctx;
-	struct ena_adapter *adapter =
-		container_of(work, struct ena_adapter, reset_task);
 	struct net_device *netdev = adapter->netdev;
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
-	struct pci_dev *pdev = adapter->pdev;
-	bool dev_up, wd_state;
-	int rc;
-
-	if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
-		dev_err(&pdev->dev,
-			"device reset schedule while reset bit is off\n");
-		return;
-	}
+	bool dev_up;
 
 	netif_carrier_off(netdev);
 
 	del_timer_sync(&adapter->timer_service);
 
-	rtnl_lock();
-
 	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	adapter->dev_up_before_reset = dev_up;
+
 	ena_com_set_admin_running_state(ena_dev, false);
 
-	/* After calling ena_close the tx queues and the napi
-	 * are disabled so no one can interfere or touch the
-	 * data structures
-	 */
 	ena_close(netdev);
 
+	/* Before releasing the ENA resources, a device reset is required.
+	 * (to prevent the device from accessing them).
+	 * In case the reset flag is set and the device is up, ena_close
+	 * already perform the reset, so it can be skipped.
+	 */
+	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
+		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
+
 	ena_free_mgmnt_irq(adapter);
 
 	ena_disable_msix(adapter);
@@ -2606,9 +2567,17 @@ static void ena_fw_reset_device(struct work_struct *work)
 	ena_com_mmio_reg_read_request_destroy(ena_dev);
 
 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
+
 	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+}
 
-	/* Finish with the destroy part. Start the init part */
+static int ena_restore_device(struct ena_adapter *adapter)
+{
+	struct ena_com_dev_get_features_ctx get_feat_ctx;
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct pci_dev *pdev = adapter->pdev;
+	bool wd_state;
+	int rc;
 
 	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
 	if (rc) {
@@ -2630,7 +2599,7 @@ static void ena_fw_reset_device(struct work_struct *work)
 		goto err_device_destroy;
 	}
 	/* If the interface was up before the reset bring it up */
-	if (dev_up) {
+	if (adapter->dev_up_before_reset) {
 		rc = ena_up(adapter);
 		if (rc) {
 			dev_err(&pdev->dev, "Failed to create I/O queues\n");
@@ -2639,24 +2608,38 @@ static void ena_fw_reset_device(struct work_struct *work)
 	}
 
 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
-
-	rtnl_unlock();
-
 	dev_err(&pdev->dev, "Device reset completed successfully\n");
 
-	return;
+	return rc;
 err_disable_msix:
 	ena_free_mgmnt_irq(adapter);
 	ena_disable_msix(adapter);
 err_device_destroy:
 	ena_com_admin_destroy(ena_dev);
 err:
-	rtnl_unlock();
-
 	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 
 	dev_err(&pdev->dev,
 		"Reset attempt failed. Can not reset the device\n");
+
+	return rc;
+}
+
+static void ena_fw_reset_device(struct work_struct *work)
+{
+	struct ena_adapter *adapter =
+		container_of(work, struct ena_adapter, reset_task);
+	struct pci_dev *pdev = adapter->pdev;
+
+	if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+		dev_err(&pdev->dev,
+			"device reset schedule while reset bit is off\n");
+		return;
+	}
+	rtnl_lock();
+	ena_destroy_device(adapter);
+	ena_restore_device(adapter);
+	rtnl_unlock();
 }
 
 static int check_missing_comp_in_queue(struct ena_adapter *adapter,
@@ -2665,7 +2648,7 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
 	struct ena_tx_buffer *tx_buf;
 	unsigned long last_jiffies;
 	u32 missed_tx = 0;
-	int i;
+	int i, rc = 0;
 
 	for (i = 0; i < tx_ring->ring_size; i++) {
 		tx_buf = &tx_ring->tx_buffer_info[i];
@@ -2679,21 +2662,25 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
 
 			tx_buf->print_once = 1;
 			missed_tx++;
-
-			if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
-				netif_err(adapter, tx_err, adapter->netdev,
-					  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
-					  missed_tx,
-					  adapter->missing_tx_completion_threshold);
-				adapter->reset_reason =
-					ENA_REGS_RESET_MISS_TX_CMPL;
-				set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-				return -EIO;
-			}
 		}
 	}
 
-	return 0;
+	if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
+		netif_err(adapter, tx_err, adapter->netdev,
+			  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+			  missed_tx,
+			  adapter->missing_tx_completion_threshold);
+		adapter->reset_reason =
+			ENA_REGS_RESET_MISS_TX_CMPL;
+		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+		rc = -EIO;
+	}
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.missed_tx = missed_tx;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	return rc;
 }
 
 static void check_for_missing_tx_completions(struct ena_adapter *adapter)
@@ -3276,8 +3263,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_rss;
 	}
 
-	INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
-	INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
 	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
 
 	adapter->last_keep_alive_jiffies = jiffies;
@@ -3311,8 +3296,6 @@ err_free_msix:
 err_worker_destroy:
 	ena_com_destroy_interrupt_moderation(ena_dev);
 	del_timer(&adapter->timer_service);
-	cancel_work_sync(&adapter->suspend_io_task);
-	cancel_work_sync(&adapter->resume_io_task);
 err_netdev_destroy:
 	free_netdev(netdev);
 err_device_destroy:
@@ -3382,10 +3365,6 @@ static void ena_remove(struct pci_dev *pdev)
 
 	cancel_work_sync(&adapter->reset_task);
 
-	cancel_work_sync(&adapter->suspend_io_task);
-
-	cancel_work_sync(&adapter->resume_io_task);
-
 	/* Reset the device only if the device is running. */
 	if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
 		ena_com_dev_reset(ena_dev, adapter->reset_reason);
@@ -3419,11 +3398,59 @@ static void ena_remove(struct pci_dev *pdev)
 	vfree(ena_dev);
 }
 
+#ifdef CONFIG_PM
+/* ena_suspend - PM suspend callback
+ * @pdev: PCI device information struct
+ * @state:power state
+ */
+static int ena_suspend(struct pci_dev *pdev,  pm_message_t state)
+{
+	struct ena_adapter *adapter = pci_get_drvdata(pdev);
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.suspend++;
+	u64_stats_update_end(&adapter->syncp);
+
+	rtnl_lock();
+	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+		dev_err(&pdev->dev,
+			"ignoring device reset request as the device is being suspended\n");
+		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+	}
+	ena_destroy_device(adapter);
+	rtnl_unlock();
+	return 0;
+}
+
+/* ena_resume - PM resume callback
+ * @pdev: PCI device information struct
+ *
+ */
+static int ena_resume(struct pci_dev *pdev)
+{
+	struct ena_adapter *adapter = pci_get_drvdata(pdev);
+	int rc;
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.resume++;
+	u64_stats_update_end(&adapter->syncp);
+
+	rtnl_lock();
+	rc = ena_restore_device(adapter);
+	rtnl_unlock();
+	return rc;
+}
+#endif
+
 static struct pci_driver ena_pci_driver = {
 	.name		= DRV_MODULE_NAME,
 	.id_table	= ena_pci_tbl,
 	.probe		= ena_probe,
 	.remove		= ena_remove,
+#ifdef CONFIG_PM
+	.suspend    = ena_suspend,
+	.resume     = ena_resume,
+#endif
 	.sriov_configure = ena_sriov_configure,
 };
 
@@ -3504,16 +3531,6 @@ static void ena_notification(void *adapter_data,
 	     ENA_ADMIN_NOTIFICATION);
 
 	switch (aenq_e->aenq_common_desc.syndrom) {
-	case ENA_ADMIN_SUSPEND:
-		/* Suspend just the IO queues.
-		 * We deliberately don't suspend admin so the timer and
-		 * the keep_alive events should remain.
-		 */
-		queue_work(ena_wq, &adapter->suspend_io_task);
-		break;
-	case ENA_ADMIN_RESUME:
-		queue_work(ena_wq, &adapter->resume_io_task);
-		break;
 	case ENA_ADMIN_UPDATE_HINTS:
 		hints = (struct ena_admin_ena_hw_hints *)
 			(&aenq_e->inline_data_w4);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 29bb5704260b..ed8bd0a579c4 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -44,7 +44,7 @@
 #include "ena_eth_com.h"
 
 #define DRV_MODULE_VER_MAJOR	1
-#define DRV_MODULE_VER_MINOR	2
+#define DRV_MODULE_VER_MINOR	3
 #define DRV_MODULE_VER_SUBMINOR 0
 
 #define DRV_MODULE_NAME		"ena"
@@ -52,7 +52,7 @@
 #define DRV_MODULE_VERSION \
 	__stringify(DRV_MODULE_VER_MAJOR) "."	\
 	__stringify(DRV_MODULE_VER_MINOR) "."	\
-	__stringify(DRV_MODULE_VER_SUBMINOR) "k"
+	__stringify(DRV_MODULE_VER_SUBMINOR) "K"
 #endif
 
 #define DEVICE_NAME	"Elastic Network Adapter (ENA)"
@@ -185,6 +185,7 @@ struct ena_stats_tx {
 	u64 tx_poll;
 	u64 doorbells;
 	u64 bad_req_id;
+	u64 missed_tx;
 };
 
 struct ena_stats_rx {
@@ -257,8 +258,8 @@ struct ena_ring {
 
 struct ena_stats_dev {
 	u64 tx_timeout;
-	u64 io_suspend;
-	u64 io_resume;
+	u64 suspend;
+	u64 resume;
 	u64 wd_expired;
 	u64 interface_up;
 	u64 interface_down;
@@ -326,11 +327,10 @@ struct ena_adapter {
 
 	/* timer service */
 	struct work_struct reset_task;
-	struct work_struct suspend_io_task;
-	struct work_struct resume_io_task;
 	struct timer_list timer_service;
 
 	bool wd_state;
+	bool dev_up_before_reset;
 	unsigned long last_keep_alive_jiffies;
 
 	struct u64_stats_sync syncp;
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index e22f976a0d18..212fe72a190b 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -123,6 +123,7 @@ struct lance_private {
 	int burst_sizes;	      /* ledma SBus burst sizes */
 #endif
 	struct timer_list         multicast_timer;
+	struct net_device	  *dev;
 };
 
 #define LANCE_ADDR(x) ((int)(x) & ~0xff000000)
@@ -638,6 +639,13 @@ static void lance_set_multicast(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
+static void lance_set_multicast_retry(struct timer_list *t)
+{
+	struct lance_private *lp = from_timer(lp, t, multicast_timer);
+
+	lance_set_multicast(lp->dev);
+}
+
 static int a2065_init_one(struct zorro_dev *z,
 			  const struct zorro_device_id *ent);
 static void a2065_remove_one(struct zorro_dev *z);
@@ -728,15 +736,13 @@ static int a2065_init_one(struct zorro_dev *z,
 	priv->lance_log_tx_bufs = LANCE_LOG_TX_BUFFERS;
 	priv->rx_ring_mod_mask = RX_RING_MOD_MASK;
 	priv->tx_ring_mod_mask = TX_RING_MOD_MASK;
+	priv->dev = dev;
 
 	dev->netdev_ops = &lance_netdev_ops;
 	dev->watchdog_timeo = 5*HZ;
 	dev->dma = 0;
 
-	init_timer(&priv->multicast_timer);
-	priv->multicast_timer.data = (unsigned long) dev;
-	priv->multicast_timer.function =
-		(void (*)(unsigned long))lance_set_multicast;
+	timer_setup(&priv->multicast_timer, lance_set_multicast_retry, 0);
 
 	err = register_netdev(dev);
 	if (err) {
diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index b11e910850f7..01d132c02ff9 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c
@@ -302,10 +302,10 @@ am79c961_init_for_open(struct net_device *dev)
 	write_rreg (dev->base_addr, CSR0, CSR0_IENA|CSR0_STRT);
 }
 
-static void am79c961_timer(unsigned long data)
+static void am79c961_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct dev_priv *priv = netdev_priv(dev);
+	struct dev_priv *priv = from_timer(priv, t, timer);
+	struct net_device *dev = priv->dev;
 	unsigned int lnkstat, carrier;
 	unsigned long flags;
 
@@ -728,9 +728,8 @@ static int am79c961_probe(struct platform_device *pdev)
 	am79c961_banner();
 
 	spin_lock_init(&priv->chip_lock);
-	init_timer(&priv->timer);
-	priv->timer.data = (unsigned long)dev;
-	priv->timer.function = am79c961_timer;
+	priv->dev = dev;
+	timer_setup(&priv->timer, am79c961_timer, 0);
 
 	if (am79c961_hw_init(dev))
 		goto release;
diff --git a/drivers/net/ethernet/amd/am79c961a.h b/drivers/net/ethernet/amd/am79c961a.h
index 9f384b79507b..fc5088c70731 100644
--- a/drivers/net/ethernet/amd/am79c961a.h
+++ b/drivers/net/ethernet/amd/am79c961a.h
@@ -140,6 +140,7 @@ struct dev_priv {
     unsigned long	txhdr;
     spinlock_t		chip_lock;
     struct timer_list	timer;
+    struct net_device   *dev;
 };
 
 #endif
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 7b5df562f30f..358f7ab77c70 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1669,9 +1669,9 @@ static int amd8111e_resume(struct pci_dev *pci_dev)
 	return 0;
 }
 
-static void amd8111e_config_ipg(struct net_device *dev)
+static void amd8111e_config_ipg(struct timer_list *t)
 {
-	struct amd8111e_priv *lp = netdev_priv(dev);
+	struct amd8111e_priv *lp = from_timer(lp, t, ipg_data.ipg_timer);
 	struct ipg_info *ipg_data = &lp->ipg_data;
 	void __iomem *mmio = lp->mmio;
 	unsigned int prev_col_cnt = ipg_data->col_cnt;
@@ -1883,9 +1883,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 
 	/* Initialize software ipg timer */
 	if(lp->options & OPTION_DYN_IPG_ENABLE){
-		init_timer(&lp->ipg_data.ipg_timer);
-		lp->ipg_data.ipg_timer.data = (unsigned long) dev;
-		lp->ipg_data.ipg_timer.function = (void *)&amd8111e_config_ipg;
+		timer_setup(&lp->ipg_data.ipg_timer, amd8111e_config_ipg, 0);
 		lp->ipg_data.ipg_timer.expires = jiffies +
 						 IPG_CONVERGE_JIFFIES;
 		lp->ipg_data.ipg = DEFAULT_IPG;
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 82cc81385033..116997a8b593 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -260,6 +260,7 @@ struct lance_private {
 	unsigned short busmaster_regval;
 
 	struct timer_list       multicast_timer;
+	struct net_device	*dev;
 
 	/* Pointers to the ring buffers as seen from the CPU */
 	char *rx_buf_ptr_cpu[RX_RING_SIZE];
@@ -1000,9 +1001,10 @@ static void lance_set_multicast(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static void lance_set_multicast_retry(unsigned long _opaque)
+static void lance_set_multicast_retry(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) _opaque;
+	struct lance_private *lp = from_timer(lp, t, multicast_timer);
+	struct net_device *dev = lp->dev;
 
 	lance_set_multicast(dev);
 }
@@ -1246,9 +1248,9 @@ static int dec_lance_probe(struct device *bdev, const int type)
 	 * can occur from interrupts (ex. IPv6).  So we
 	 * use a timer to try again later when necessary. -DaveM
 	 */
-	init_timer(&lp->multicast_timer);
-	lp->multicast_timer.data = (unsigned long) dev;
-	lp->multicast_timer.function = lance_set_multicast_retry;
+	lp->dev = dev;
+	timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0);
+
 
 	ret = register_netdev(dev);
 	if (ret) {
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index 7f60d17819ce..a561705f232c 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -321,7 +321,7 @@ static struct net_device_stats *pcnet32_get_stats(struct net_device *);
 static void pcnet32_load_multicast(struct net_device *dev);
 static void pcnet32_set_multicast_list(struct net_device *);
 static int pcnet32_ioctl(struct net_device *, struct ifreq *, int);
-static void pcnet32_watchdog(struct net_device *);
+static void pcnet32_watchdog(struct timer_list *);
 static int mdio_read(struct net_device *dev, int phy_id, int reg_num);
 static void mdio_write(struct net_device *dev, int phy_id, int reg_num,
 		       int val);
@@ -1970,9 +1970,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
 			lp->options |= PCNET32_PORT_MII;
 	}
 
-	init_timer(&lp->watchdog_timer);
-	lp->watchdog_timer.data = (unsigned long)dev;
-	lp->watchdog_timer.function = (void *)&pcnet32_watchdog;
+	timer_setup(&lp->watchdog_timer, pcnet32_watchdog, 0);
 
 	/* The PCNET32-specific entries in the device structure. */
 	dev->netdev_ops = &pcnet32_netdev_ops;
@@ -2902,9 +2900,10 @@ static void pcnet32_check_media(struct net_device *dev, int verbose)
  * Could possibly be changed to use mii_check_media instead.
  */
 
-static void pcnet32_watchdog(struct net_device *dev)
+static void pcnet32_watchdog(struct timer_list *t)
 {
-	struct pcnet32_private *lp = netdev_priv(dev);
+	struct pcnet32_private *lp = from_timer(lp, t, watchdog_timer);
+	struct net_device *dev = lp->dev;
 	unsigned long flags;
 
 	/* Print the link status if it has changed */
diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index 291ca5187f12..cdd7a611479b 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1248,9 +1248,10 @@ static void lance_set_multicast(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static void lance_set_multicast_retry(unsigned long _opaque)
+static void lance_set_multicast_retry(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) _opaque;
+	struct lance_private *lp = from_timer(lp, t, multicast_timer);
+	struct net_device *dev = lp->dev;
 
 	lance_set_multicast(dev);
 }
@@ -1459,9 +1460,7 @@ no_link_test:
 	 * can occur from interrupts (ex. IPv6).  So we
 	 * use a timer to try again later when necessary. -DaveM
 	 */
-	init_timer(&lp->multicast_timer);
-	lp->multicast_timer.data = (unsigned long) dev;
-	lp->multicast_timer.function = lance_set_multicast_retry;
+	timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0);
 
 	if (register_netdev(dev)) {
 		printk(KERN_ERR "SunLance: Cannot register device.\n");
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 45d92304068e..cc1e4f820e64 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -295,7 +295,7 @@ again:
 	order = alloc_order;
 
 	/* Try to obtain pages, decreasing order if necessary */
-	gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+	gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
 	while (order >= 0) {
 		pages = alloc_pages_node(node, gfp, order);
 		if (pages)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 608693d11bd7..a74a8fbad53a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -642,9 +642,9 @@ static irqreturn_t xgbe_dma_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void xgbe_tx_timer(unsigned long data)
+static void xgbe_tx_timer(struct timer_list *t)
 {
-	struct xgbe_channel *channel = (struct xgbe_channel *)data;
+	struct xgbe_channel *channel = from_timer(channel, t, tx_timer);
 	struct xgbe_prv_data *pdata = channel->pdata;
 	struct napi_struct *napi;
 
@@ -680,9 +680,9 @@ static void xgbe_service(struct work_struct *work)
 	pdata->phy_if.phy_status(pdata);
 }
 
-static void xgbe_service_timer(unsigned long data)
+static void xgbe_service_timer(struct timer_list *t)
 {
-	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+	struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer);
 
 	queue_work(pdata->dev_workqueue, &pdata->service_work);
 
@@ -694,16 +694,14 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata)
 	struct xgbe_channel *channel;
 	unsigned int i;
 
-	setup_timer(&pdata->service_timer, xgbe_service_timer,
-		    (unsigned long)pdata);
+	timer_setup(&pdata->service_timer, xgbe_service_timer, 0);
 
 	for (i = 0; i < pdata->channel_count; i++) {
 		channel = pdata->channel[i];
 		if (!channel->tx_ring)
 			break;
 
-		setup_timer(&channel->tx_timer, xgbe_tx_timer,
-			    (unsigned long)channel);
+		timer_setup(&channel->tx_timer, xgbe_tx_timer, 0);
 	}
 }
 
@@ -2208,7 +2206,7 @@ static int xgbe_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 tc;
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index eac740c476ce..5a655d289dd5 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -157,7 +157,7 @@ static irqreturn_t bmac_misc_intr(int irq, void *dev_id);
 static irqreturn_t bmac_txdma_intr(int irq, void *dev_id);
 static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id);
 static void bmac_set_timeout(struct net_device *dev);
-static void bmac_tx_timeout(unsigned long data);
+static void bmac_tx_timeout(struct timer_list *t);
 static int bmac_output(struct sk_buff *skb, struct net_device *dev);
 static void bmac_start(struct net_device *dev);
 
@@ -555,8 +555,6 @@ static inline void bmac_set_timeout(struct net_device *dev)
 	if (bp->timeout_active)
 		del_timer(&bp->tx_timeout);
 	bp->tx_timeout.expires = jiffies + TX_TIMEOUT;
-	bp->tx_timeout.function = bmac_tx_timeout;
-	bp->tx_timeout.data = (unsigned long) dev;
 	add_timer(&bp->tx_timeout);
 	bp->timeout_active = 1;
 	spin_unlock_irqrestore(&bp->lock, flags);
@@ -1321,7 +1319,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	bp->queue = (struct sk_buff_head *)(bp->rx_cmds + N_RX_RING + 1);
 	skb_queue_head_init(bp->queue);
 
-	init_timer(&bp->tx_timeout);
+	timer_setup(&bp->tx_timeout, bmac_tx_timeout, 0);
 
 	ret = request_irq(dev->irq, bmac_misc_intr, 0, "BMAC-misc", dev);
 	if (ret) {
@@ -1471,10 +1469,10 @@ bmac_output(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static void bmac_tx_timeout(unsigned long data)
+static void bmac_tx_timeout(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct bmac_data *bp = netdev_priv(dev);
+	struct bmac_data *bp = from_timer(bp, t, tx_timeout);
+	struct net_device *dev = macio_get_drvdata(bp->mdev);
 	volatile struct dbdma_regs __iomem *td = bp->tx_dma;
 	volatile struct dbdma_regs __iomem *rd = bp->rx_dma;
 	volatile struct dbdma_cmd *cp;
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index e58b157b7d7c..0b5429d76bcf 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -86,7 +86,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id);
 static irqreturn_t mace_txdma_intr(int irq, void *dev_id);
 static irqreturn_t mace_rxdma_intr(int irq, void *dev_id);
 static void mace_set_timeout(struct net_device *dev);
-static void mace_tx_timeout(unsigned long data);
+static void mace_tx_timeout(struct timer_list *t);
 static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma);
 static inline void mace_clean_rings(struct mace_data *mp);
 static void __mace_set_address(struct net_device *dev, void *addr);
@@ -196,7 +196,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
 
 	memset((char *) mp->tx_cmds, 0,
 	       (NCMDS_TX*N_TX_RING + N_RX_RING + 2) * sizeof(struct dbdma_cmd));
-	init_timer(&mp->tx_timeout);
+	timer_setup(&mp->tx_timeout, mace_tx_timeout, 0);
 	spin_lock_init(&mp->lock);
 	mp->timeout_active = 0;
 
@@ -521,8 +521,6 @@ static inline void mace_set_timeout(struct net_device *dev)
     if (mp->timeout_active)
 	del_timer(&mp->tx_timeout);
     mp->tx_timeout.expires = jiffies + TX_TIMEOUT;
-    mp->tx_timeout.function = mace_tx_timeout;
-    mp->tx_timeout.data = (unsigned long) dev;
     add_timer(&mp->tx_timeout);
     mp->timeout_active = 1;
 }
@@ -801,10 +799,10 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
     return IRQ_HANDLED;
 }
 
-static void mace_tx_timeout(unsigned long data)
+static void mace_tx_timeout(struct timer_list *t)
 {
-    struct net_device *dev = (struct net_device *) data;
-    struct mace_data *mp = netdev_priv(dev);
+    struct mace_data *mp = from_timer(mp, t, tx_timeout);
+    struct net_device *dev = macio_get_drvdata(mp->mdev);
     volatile struct mace __iomem *mb = mp->mace;
     volatile struct dbdma_regs __iomem *td = mp->tx_dma;
     volatile struct dbdma_regs __iomem *rd = mp->rx_dma;
diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig
index cdf78e069a39..7d623e90dc19 100644
--- a/drivers/net/ethernet/aquantia/Kconfig
+++ b/drivers/net/ethernet/aquantia/Kconfig
@@ -9,7 +9,7 @@ config NET_VENDOR_AQUANTIA
 	  Set this to y if you have an Ethernet network cards that uses the aQuantia
 	  AQC107/AQC108 chipset.
 
-	  This option does not build any drivers; it casues the aQuantia
+	  This option does not build any drivers; it causes the aQuantia
 	  drivers that can be built to appear in the list of Ethernet drivers.
 
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index d5e99b468870..70efb7467bf3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -221,8 +221,8 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev,
 	return err;
 }
 
-int aq_ethtool_get_coalesce(struct net_device *ndev,
-			    struct ethtool_coalesce *coal)
+static int aq_ethtool_get_coalesce(struct net_device *ndev,
+				   struct ethtool_coalesce *coal)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
 	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
@@ -242,8 +242,8 @@ int aq_ethtool_get_coalesce(struct net_device *ndev,
 	return 0;
 }
 
-int aq_ethtool_set_coalesce(struct net_device *ndev,
-			    struct ethtool_coalesce *coal)
+static int aq_ethtool_set_coalesce(struct net_device *ndev,
+				   struct ethtool_coalesce *coal)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
 	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 0654e0c76bc2..519ca6534b85 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -304,8 +304,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self)
 		buff->flags = 0U;
 		buff->len = AQ_CFG_RX_FRAME_MAX;
 
-		buff->page = alloc_pages(GFP_ATOMIC | __GFP_COLD |
-					 __GFP_COMP, pages_order);
+		buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order);
 		if (!buff->page) {
 			err = -ENOMEM;
 			goto err_exit;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 67134ece1107..af75156919ed 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -184,6 +184,7 @@ config BGMAC_PLATFORM
 config SYSTEMPORT
 	tristate "Broadcom SYSTEMPORT internal MAC support"
 	depends on OF
+	depends on NET_DSA || !NET_DSA
 	select MII
 	select PHYLIB
 	select FIXED_PHY
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index a1125d10c825..42e44fc03a18 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -1474,10 +1474,8 @@ static int b44_open(struct net_device *dev)
 		goto out;
 	}
 
-	init_timer(&bp->timer);
+	setup_timer(&bp->timer, b44_timer, (unsigned long)bp);
 	bp->timer.expires = jiffies + HZ;
-	bp->timer.data = (unsigned long) bp;
-	bp->timer.function = b44_timer;
 	add_timer(&bp->timer);
 
 	b44_enable_ints(bp);
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 4f3845a58126..d9346e2ac720 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -295,16 +295,13 @@ static int bcm_enet_refill_rx(struct net_device *dev)
 /*
  * timer callback to defer refill rx queue in case we're OOM
  */
-static void bcm_enet_refill_rx_timer(unsigned long data)
+static void bcm_enet_refill_rx_timer(struct timer_list *t)
 {
-	struct net_device *dev;
-	struct bcm_enet_priv *priv;
-
-	dev = (struct net_device *)data;
-	priv = netdev_priv(dev);
+	struct bcm_enet_priv *priv = from_timer(priv, t, rx_timeout);
+	struct net_device *dev = priv->net_dev;
 
 	spin_lock(&priv->rx_lock);
-	bcm_enet_refill_rx((struct net_device *)data);
+	bcm_enet_refill_rx(dev);
 	spin_unlock(&priv->rx_lock);
 }
 
@@ -1062,7 +1059,8 @@ static int bcm_enet_open(struct net_device *dev)
 	val = enet_readl(priv, ENET_CTL_REG);
 	val |= ENET_CTL_ENABLE_MASK;
 	enet_writel(priv, val, ENET_CTL_REG);
-	enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG);
+	if (priv->dma_has_sram)
+		enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG);
 	enet_dmac_writel(priv, priv->dma_chan_en_mask,
 			 ENETDMAC_CHANCFG, priv->rx_chan);
 
@@ -1721,10 +1719,8 @@ static int bcm_enet_probe(struct platform_device *pdev)
 	const char *clk_name;
 	int i, ret;
 
-	/* stop if shared driver failed, assume driver->probe will be
-	 * called in the same order we register devices (correct ?) */
 	if (!bcm_enet_shared_base[0])
-		return -ENODEV;
+		return -EPROBE_DEFER;
 
 	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	res_irq_rx = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
@@ -1768,12 +1764,14 @@ static int bcm_enet_probe(struct platform_device *pdev)
 		clk_name = "enet1";
 	}
 
-	priv->mac_clk = clk_get(&pdev->dev, clk_name);
+	priv->mac_clk = devm_clk_get(&pdev->dev, clk_name);
 	if (IS_ERR(priv->mac_clk)) {
 		ret = PTR_ERR(priv->mac_clk);
 		goto out;
 	}
-	clk_prepare_enable(priv->mac_clk);
+	ret = clk_prepare_enable(priv->mac_clk);
+	if (ret)
+		goto out;
 
 	/* initialize default and fetch platform data */
 	priv->rx_ring_size = BCMENET_DEF_RX_DESC;
@@ -1801,13 +1799,15 @@ static int bcm_enet_probe(struct platform_device *pdev)
 
 	if (priv->mac_id == 0 && priv->has_phy && !priv->use_external_mii) {
 		/* using internal PHY, enable clock */
-		priv->phy_clk = clk_get(&pdev->dev, "ephy");
+		priv->phy_clk = devm_clk_get(&pdev->dev, "ephy");
 		if (IS_ERR(priv->phy_clk)) {
 			ret = PTR_ERR(priv->phy_clk);
 			priv->phy_clk = NULL;
-			goto out_put_clk_mac;
+			goto out_disable_clk_mac;
 		}
-		clk_prepare_enable(priv->phy_clk);
+		ret = clk_prepare_enable(priv->phy_clk);
+		if (ret)
+			goto out_disable_clk_mac;
 	}
 
 	/* do minimal hardware init to be able to probe mii bus */
@@ -1857,9 +1857,7 @@ static int bcm_enet_probe(struct platform_device *pdev)
 	spin_lock_init(&priv->rx_lock);
 
 	/* init rx timeout (used for oom) */
-	init_timer(&priv->rx_timeout);
-	priv->rx_timeout.function = bcm_enet_refill_rx_timer;
-	priv->rx_timeout.data = (unsigned long)dev;
+	timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0);
 
 	/* init the mib update lock&work */
 	mutex_init(&priv->mib_update_lock);
@@ -1901,14 +1899,10 @@ out_free_mdio:
 out_uninit_hw:
 	/* turn off mdc clock */
 	enet_writel(priv, 0, ENET_MIISC_REG);
-	if (priv->phy_clk) {
-		clk_disable_unprepare(priv->phy_clk);
-		clk_put(priv->phy_clk);
-	}
+	clk_disable_unprepare(priv->phy_clk);
 
-out_put_clk_mac:
+out_disable_clk_mac:
 	clk_disable_unprepare(priv->mac_clk);
-	clk_put(priv->mac_clk);
 out:
 	free_netdev(dev);
 	return ret;
@@ -1944,12 +1938,8 @@ static int bcm_enet_remove(struct platform_device *pdev)
 	}
 
 	/* disable hw block clocks */
-	if (priv->phy_clk) {
-		clk_disable_unprepare(priv->phy_clk);
-		clk_put(priv->phy_clk);
-	}
+	clk_disable_unprepare(priv->phy_clk);
 	clk_disable_unprepare(priv->mac_clk);
-	clk_put(priv->mac_clk);
 
 	free_netdev(dev);
 	return 0;
@@ -2021,9 +2011,9 @@ static inline int bcm_enet_port_is_rgmii(int portid)
 /*
  * enet sw PHY polling
  */
-static void swphy_poll_timer(unsigned long data)
+static void swphy_poll_timer(struct timer_list *t)
 {
-	struct bcm_enet_priv *priv = (struct bcm_enet_priv *)data;
+	struct bcm_enet_priv *priv = from_timer(priv, t, swphy_poll);
 	unsigned int i;
 
 	for (i = 0; i < priv->num_ports; i++) {
@@ -2332,11 +2322,8 @@ static int bcm_enetsw_open(struct net_device *dev)
 	}
 
 	/* start phy polling timer */
-	init_timer(&priv->swphy_poll);
-	priv->swphy_poll.function = swphy_poll_timer;
-	priv->swphy_poll.data = (unsigned long)priv;
-	priv->swphy_poll.expires = jiffies;
-	add_timer(&priv->swphy_poll);
+	timer_setup(&priv->swphy_poll, swphy_poll_timer, 0);
+	mod_timer(&priv->swphy_poll, jiffies);
 	return 0;
 
 out:
@@ -2692,11 +2679,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 	struct resource *res_mem;
 	int ret, irq_rx, irq_tx;
 
-	/* stop if shared driver failed, assume driver->probe will be
-	 * called in the same order we register devices (correct ?)
-	 */
 	if (!bcm_enet_shared_base[0])
-		return -ENODEV;
+		return -EPROBE_DEFER;
 
 	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq_rx = platform_get_irq(pdev, 0);
@@ -2735,33 +2719,27 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 	if (ret)
 		goto out;
 
-	if (!request_mem_region(res_mem->start, resource_size(res_mem),
-				"bcm63xx_enetsw")) {
-		ret = -EBUSY;
+	priv->base = devm_ioremap_resource(&pdev->dev, res_mem);
+	if (IS_ERR(priv->base)) {
+		ret = PTR_ERR(priv->base);
 		goto out;
 	}
 
-	priv->base = ioremap(res_mem->start, resource_size(res_mem));
-	if (priv->base == NULL) {
-		ret = -ENOMEM;
-		goto out_release_mem;
-	}
-
-	priv->mac_clk = clk_get(&pdev->dev, "enetsw");
+	priv->mac_clk = devm_clk_get(&pdev->dev, "enetsw");
 	if (IS_ERR(priv->mac_clk)) {
 		ret = PTR_ERR(priv->mac_clk);
-		goto out_unmap;
+		goto out;
 	}
-	clk_enable(priv->mac_clk);
+	ret = clk_prepare_enable(priv->mac_clk);
+	if (ret)
+		goto out;
 
 	priv->rx_chan = 0;
 	priv->tx_chan = 1;
 	spin_lock_init(&priv->rx_lock);
 
 	/* init rx timeout (used for oom) */
-	init_timer(&priv->rx_timeout);
-	priv->rx_timeout.function = bcm_enet_refill_rx_timer;
-	priv->rx_timeout.data = (unsigned long)dev;
+	timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0);
 
 	/* register netdevice */
 	dev->netdev_ops = &bcm_enetsw_ops;
@@ -2773,7 +2751,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 
 	ret = register_netdev(dev);
 	if (ret)
-		goto out_put_clk;
+		goto out_disable_clk;
 
 	netif_carrier_off(dev);
 	platform_set_drvdata(pdev, dev);
@@ -2782,14 +2760,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 
 	return 0;
 
-out_put_clk:
-	clk_put(priv->mac_clk);
-
-out_unmap:
-	iounmap(priv->base);
-
-out_release_mem:
-	release_mem_region(res_mem->start, resource_size(res_mem));
+out_disable_clk:
+	clk_disable_unprepare(priv->mac_clk);
 out:
 	free_netdev(dev);
 	return ret;
@@ -2801,17 +2773,13 @@ static int bcm_enetsw_remove(struct platform_device *pdev)
 {
 	struct bcm_enet_priv *priv;
 	struct net_device *dev;
-	struct resource *res;
 
 	/* stop netdevice */
 	dev = platform_get_drvdata(pdev);
 	priv = netdev_priv(dev);
 	unregister_netdev(dev);
 
-	/* release device resources */
-	iounmap(priv->base);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
+	clk_disable_unprepare(priv->mac_clk);
 
 	free_netdev(dev);
 	return 0;
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
index c6f6f14e87ca..5a66728d4776 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
@@ -9,7 +9,6 @@
 #include <linux/platform_device.h>
 
 #include <bcm63xx_regs.h>
-#include <bcm63xx_irq.h>
 #include <bcm63xx_io.h>
 #include <bcm63xx_iudma.h>
 
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index eb441e5e2cd8..087f01b4dc3a 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1416,9 +1416,24 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 	tdma_writel(priv, 0, TDMA_DESC_RING_COUNT(index));
 	tdma_writel(priv, 1, TDMA_DESC_RING_INTR_CONTROL(index));
 	tdma_writel(priv, 0, TDMA_DESC_RING_PROD_CONS_INDEX(index));
-	tdma_writel(priv, RING_IGNORE_STATUS, TDMA_DESC_RING_MAPPING(index));
+
+	/* Configure QID and port mapping */
+	reg = tdma_readl(priv, TDMA_DESC_RING_MAPPING(index));
+	reg &= ~(RING_QID_MASK | RING_PORT_ID_MASK << RING_PORT_ID_SHIFT);
+	if (ring->inspect) {
+		reg |= ring->switch_queue & RING_QID_MASK;
+		reg |= ring->switch_port << RING_PORT_ID_SHIFT;
+	} else {
+		reg |= RING_IGNORE_STATUS;
+	}
+	tdma_writel(priv, reg, TDMA_DESC_RING_MAPPING(index));
 	tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index));
 
+	/* Enable ACB algorithm 2 */
+	reg = tdma_readl(priv, TDMA_CONTROL);
+	reg |= tdma_control_bit(priv, ACB_ALGO);
+	tdma_writel(priv, reg, TDMA_CONTROL);
+
 	/* Do not use tdma_control_bit() here because TSB_SWAP1 collides
 	 * with the original definition of ACB_ALGO
 	 */
@@ -1447,8 +1462,9 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 	napi_enable(&ring->napi);
 
 	netif_dbg(priv, hw, priv->netdev,
-		  "TDMA cfg, size=%d, desc_cpu=%p\n",
-		  ring->size, ring->desc_cpu);
+		  "TDMA cfg, size=%d, desc_cpu=%p switch q=%d,port=%d\n",
+		  ring->size, ring->desc_cpu, ring->switch_queue,
+		  ring->switch_port);
 
 	return 0;
 }
@@ -2013,6 +2029,29 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = {
 	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
 
+static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
+				    void *accel_priv,
+				    select_queue_fallback_t fallback)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	u16 queue = skb_get_queue_mapping(skb);
+	struct bcm_sysport_tx_ring *tx_ring;
+	unsigned int q, port;
+
+	if (!netdev_uses_dsa(dev))
+		return fallback(dev, skb);
+
+	/* DSA tagging layer will have configured the correct queue */
+	q = BRCM_TAG_GET_QUEUE(queue);
+	port = BRCM_TAG_GET_PORT(queue);
+	tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues];
+
+	if (unlikely(!tx_ring))
+		return fallback(dev, skb);
+
+	return tx_ring->index;
+}
+
 static const struct net_device_ops bcm_sysport_netdev_ops = {
 	.ndo_start_xmit		= bcm_sysport_xmit,
 	.ndo_tx_timeout		= bcm_sysport_tx_timeout,
@@ -2025,8 +2064,79 @@ static const struct net_device_ops bcm_sysport_netdev_ops = {
 	.ndo_poll_controller	= bcm_sysport_poll_controller,
 #endif
 	.ndo_get_stats64	= bcm_sysport_get_stats64,
+	.ndo_select_queue	= bcm_sysport_select_queue,
 };
 
+static int bcm_sysport_map_queues(struct net_device *dev,
+				  struct dsa_notifier_register_info *info)
+{
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_tx_ring *ring;
+	struct net_device *slave_dev;
+	unsigned int num_tx_queues;
+	unsigned int q, start, port;
+
+	/* We can't be setting up queue inspection for non directly attached
+	 * switches
+	 */
+	if (info->switch_number)
+		return 0;
+
+	if (dev->netdev_ops != &bcm_sysport_netdev_ops)
+		return 0;
+
+	port = info->port_number;
+	slave_dev = info->info.dev;
+
+	/* On SYSTEMPORT Lite we have twice as less queues, so we cannot do a
+	 * 1:1 mapping, we can only do a 2:1 mapping. By reducing the number of
+	 * per-port (slave_dev) network devices queue, we achieve just that.
+	 * This need to happen now before any slave network device is used such
+	 * it accurately reflects the number of real TX queues.
+	 */
+	if (priv->is_lite)
+		netif_set_real_num_tx_queues(slave_dev,
+					     slave_dev->num_tx_queues / 2);
+	num_tx_queues = slave_dev->real_num_tx_queues;
+
+	if (priv->per_port_num_tx_queues &&
+	    priv->per_port_num_tx_queues != num_tx_queues)
+		netdev_warn(slave_dev, "asymetric number of per-port queues\n");
+
+	priv->per_port_num_tx_queues = num_tx_queues;
+
+	start = find_first_zero_bit(&priv->queue_bitmap, dev->num_tx_queues);
+	for (q = 0; q < num_tx_queues; q++) {
+		ring = &priv->tx_rings[q + start];
+
+		/* Just remember the mapping actual programming done
+		 * during bcm_sysport_init_tx_ring
+		 */
+		ring->switch_queue = q;
+		ring->switch_port = port;
+		ring->inspect = true;
+		priv->ring_map[q + port * num_tx_queues] = ring;
+
+		/* Set all queues as being used now */
+		set_bit(q + start, &priv->queue_bitmap);
+	}
+
+	return 0;
+}
+
+static int bcm_sysport_dsa_notifier(struct notifier_block *unused,
+				    unsigned long event, void *ptr)
+{
+	struct dsa_notifier_register_info *info;
+
+	if (event != DSA_PORT_REGISTER)
+		return NOTIFY_DONE;
+
+	info = ptr;
+
+	return notifier_from_errno(bcm_sysport_map_queues(info->master, info));
+}
+
 #define REV_FMT	"v%2x.%02x"
 
 static const struct bcm_sysport_hw_params bcm_sysport_params[] = {
@@ -2174,10 +2284,18 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 
 	u64_stats_init(&priv->syncp);
 
+	priv->dsa_notifier.notifier_call = bcm_sysport_dsa_notifier;
+
+	ret = register_dsa_notifier(&priv->dsa_notifier);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register DSA notifier\n");
+		goto err_deregister_fixed_link;
+	}
+
 	ret = register_netdev(dev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register net_device\n");
-		goto err_deregister_fixed_link;
+		goto err_deregister_notifier;
 	}
 
 	priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK;
@@ -2190,6 +2308,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_deregister_notifier:
+	unregister_dsa_notifier(&priv->dsa_notifier);
 err_deregister_fixed_link:
 	if (of_phy_is_fixed_link(dn))
 		of_phy_deregister_fixed_link(dn);
@@ -2201,11 +2321,13 @@ err_free_netdev:
 static int bcm_sysport_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = dev_get_drvdata(&pdev->dev);
+	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	struct device_node *dn = pdev->dev.of_node;
 
 	/* Not much to do, ndo_close has been called
 	 * and we use managed allocations
 	 */
+	unregister_dsa_notifier(&priv->dsa_notifier);
 	unregister_netdev(dev);
 	if (of_phy_is_fixed_link(dn))
 		of_phy_deregister_fixed_link(dn);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 82e401df199e..f5a984c1c986 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -404,7 +404,7 @@ struct bcm_rsb {
 #define  RING_CONS_INDEX_MASK		0xffff
 
 #define RING_MAPPING			0x14
-#define  RING_QID_MASK			0x3
+#define  RING_QID_MASK			0x7
 #define  RING_PORT_ID_SHIFT		3
 #define  RING_PORT_ID_MASK		0x7
 #define  RING_IGNORE_STATUS		(1 << 6)
@@ -712,6 +712,9 @@ struct bcm_sysport_tx_ring {
 	struct bcm_sysport_priv *priv;	/* private context backpointer */
 	unsigned long	packets;	/* packets statistics */
 	unsigned long	bytes;		/* bytes statistics */
+	unsigned int	switch_queue;	/* switch port queue number */
+	unsigned int	switch_port;	/* switch port queue number */
+	bool		inspect;	/* inspect switch port and queue */
 };
 
 /* Driver private structure */
@@ -765,5 +768,12 @@ struct bcm_sysport_priv {
 
 	/* For atomic update generic 64bit value on 32bit Machine */
 	struct u64_stats_sync	syncp;
+
+	/* map information between switch port queues and local queues */
+	struct notifier_block	dsa_notifier;
+	unsigned int		per_port_num_tx_queues;
+	unsigned long		queue_bitmap;
+	struct bcm_sysport_tx_ring *ring_map[DSA_MAX_PORTS * 8];
+
 };
 #endif /* __BCM_SYSPORT_H */
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 6322594ab260..6fe074c1588b 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -184,13 +184,19 @@ static int bgmac_probe(struct bcma_device *core)
 
 	if (!bgmac_is_bcm4707_family(core) &&
 	    !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) {
+		struct phy_device *phydev;
+
 		mii_bus = bcma_mdio_mii_register(bgmac);
 		if (IS_ERR(mii_bus)) {
 			err = PTR_ERR(mii_bus);
 			goto err;
 		}
-
 		bgmac->mii_bus = mii_bus;
+
+		phydev = mdiobus_get_phy(bgmac->mii_bus, bgmac->phyaddr);
+		if (ci->id == BCMA_CHIP_ID_BCM53573 && phydev &&
+		    (phydev->drv->phy_id & phydev->drv->phy_id_mask) == PHY_ID_BCM54210E)
+			phydev->dev_flags |= PHY_BRCM_EN_MASTER_MODE;
 	}
 
 	if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index d937083db9a4..894eda5b13cf 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -131,6 +131,7 @@ static void bgmac_nicpm_speed_set(struct net_device *net_dev)
 	switch (bgmac->net_dev->phydev->speed) {
 	default:
 		netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n");
+		/* fall through */
 	case SPEED_1000:
 		val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT;
 		break;
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 48d672b204a4..1d96cd594ade 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -15,6 +15,7 @@
 #include <linux/bcm47xx_nvram.h>
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
+#include <net/dsa.h>
 #include "bgmac.h"
 
 static bool bgmac_wait_value(struct bgmac *bgmac, u16 reg, u32 mask,
@@ -127,6 +128,8 @@ bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 	dma_desc->ctl1 = cpu_to_le32(ctl1);
 }
 
+#define ENET_BRCM_TAG_LEN	4
+
 static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 				    struct bgmac_dma_ring *ring,
 				    struct sk_buff *skb)
@@ -139,6 +142,18 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	u32 flags;
 	int i;
 
+	/* The Ethernet switch we are interfaced with needs packets to be at
+	 * least 64 bytes (including FCS) otherwise they will be discarded when
+	 * they enter the switch port logic. When Broadcom tags are enabled, we
+	 * need to make sure that packets are at least 68 bytes
+	 * (including FCS and tag) because the length verification is done after
+	 * the Broadcom tag is stripped off the ingress packet.
+	 */
+	if (netdev_uses_dsa(net_dev)) {
+		if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN))
+			goto err_stats;
+	}
+
 	if (skb->len > BGMAC_DESC_CTL1_LEN) {
 		netdev_err(bgmac->net_dev, "Too long skb (%d)\n", skb->len);
 		goto err_drop;
@@ -225,6 +240,7 @@ err_dma_head:
 
 err_drop:
 	dev_kfree_skb(skb);
+err_stats:
 	net_dev->stats.tx_dropped++;
 	net_dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index e3af1f3cb61f..b3055a76dfbf 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8462,10 +8462,8 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 	bnx2_set_default_link(bp);
 	bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX;
 
-	init_timer(&bp->timer);
+	setup_timer(&bp->timer, bnx2_timer, (unsigned long)bp);
 	bp->timer.expires = RUN_AT(BNX2_TIMER_INTERVAL);
-	bp->timer.data = (unsigned long) bp;
-	bp->timer.function = bnx2_timer;
 
 #ifdef BCM_CNIC
 	if (bnx2_shmem_rd(bp, BNX2_ISCSI_INITIATOR) & BNX2_ISCSI_INITIATOR_EN)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 1216c1f1e052..4c739d5355d2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4289,7 +4289,7 @@ int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type,
 {
 	struct tc_mqprio_qopt *mqprio = type_data;
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c12b4d3e946e..be9fd7d184d0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9332,7 +9332,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)
 	/* Schedule the rx_mode command */
 	if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state))
 		set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state);
-	else
+	else if (bp->slowpath)
 		bnx2x_set_storm_rx_mode(bp);
 
 	/* Cleanup multicast configuration */
@@ -10271,8 +10271,15 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
 		smp_mb();
 
 		bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
-		bnx2x_nic_load(bp, LOAD_NORMAL);
-
+		/* When ret value shows failure of allocation failure,
+		 * the nic is rebooted again. If open still fails, a error
+		 * message to notify the user.
+		 */
+		if (bnx2x_nic_load(bp, LOAD_NORMAL) == -ENOMEM) {
+			bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
+			if (bnx2x_nic_load(bp, LOAD_NORMAL))
+				BNX2X_ERR("Open the NIC fails again!\n");
+		}
 		rtnl_unlock();
 		return;
 	}
@@ -12414,10 +12421,8 @@ static int bnx2x_init_bp(struct bnx2x *bp)
 
 	bp->current_interval = CHIP_REV_IS_SLOW(bp) ? 5*HZ : HZ;
 
-	init_timer(&bp->timer);
+	setup_timer(&bp->timer, bnx2x_timer, (unsigned long)bp);
 	bp->timer.expires = jiffies + bp->current_interval;
-	bp->timer.data = (unsigned long) bp;
-	bp->timer.function = bnx2x_timer;
 
 	if (SHMEM2_HAS(bp, dcbx_lldp_params_offset) &&
 	    SHMEM2_HAS(bp, dcbx_lldp_dcbx_stat_offset) &&
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9ca994d0bab6..3591077a5f6b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1074,11 +1074,6 @@ static void bnx2x_vf_set_bars(struct bnx2x *bp, struct bnx2x_virtf *vf)
 	}
 }
 
-static int bnx2x_ari_enabled(struct pci_dev *dev)
-{
-	return dev->bus->self && dev->bus->self->ari_enabled;
-}
-
 static int
 bnx2x_get_vf_igu_cam_info(struct bnx2x *bp)
 {
@@ -1212,7 +1207,7 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
 
 	err = -EIO;
 	/* verify ari is enabled */
-	if (!bnx2x_ari_enabled(bp->pdev)) {
+	if (!pci_ari_enabled(bp->pdev->bus)) {
 		BNX2X_ERR("ARI not supported (check pci bridge ARI forwarding), SRIOV can not be enabled\n");
 		return 0;
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 4f0cb8e1ffc0..59c8ec9c1cad 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_tc.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o
+bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dc5de275352a..33c49ad697e4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -61,6 +61,7 @@
 #include "bnxt_xdp.h"
 #include "bnxt_vfr.h"
 #include "bnxt_tc.h"
+#include "bnxt_devlink.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 
@@ -107,9 +108,11 @@ enum board_idx {
 	BCM57452,
 	BCM57454,
 	BCM58802,
+	BCM58804,
 	BCM58808,
 	NETXTREME_E_VF,
 	NETXTREME_C_VF,
+	NETXTREME_S_VF,
 };
 
 /* indexed by enum above */
@@ -145,9 +148,11 @@ static const struct {
 	[BCM57452] = { "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" },
 	[BCM57454] = { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
 	[BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	[BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
 	[BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
 	[NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
 	[NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
+	[NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" },
 };
 
 static const struct pci_device_id bnxt_pci_tbl[] = {
@@ -185,6 +190,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
 	{ PCI_VDEVICE(BROADCOM, 0x16f0), .driver_data = BCM58808 },
 	{ PCI_VDEVICE(BROADCOM, 0x16f1), .driver_data = BCM57452 },
 	{ PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
+	{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
 #ifdef CONFIG_BNXT_SRIOV
 	{ PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF },
@@ -194,6 +200,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
 	{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF },
+	{ PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF },
 #endif
 	{ 0 }
 };
@@ -218,7 +225,8 @@ static struct workqueue_struct *bnxt_pf_wq;
 
 static bool bnxt_vf_pciid(enum board_idx idx)
 {
-	return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF);
+	return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
+		idx == NETXTREME_S_VF);
 }
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
@@ -1509,7 +1517,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 				   (struct rx_tpa_end_cmp *)rxcmp,
 				   (struct rx_tpa_end_cmp_ext *)rxcmp1, event);
 
-		if (unlikely(IS_ERR(skb)))
+		if (IS_ERR(skb))
 			return -EBUSY;
 
 		rc = -ENOMEM;
@@ -2827,7 +2835,8 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
 	if (page_mode) {
 		if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU)
 			return -EOPNOTSUPP;
-		bp->dev->max_mtu = BNXT_MAX_PAGE_MODE_MTU;
+		bp->dev->max_mtu =
+			min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
 		bp->flags &= ~BNXT_FLAG_AGG_RINGS;
 		bp->flags |= BNXT_FLAG_NO_AGG_RINGS | BNXT_FLAG_RX_PAGE_MODE;
 		bp->dev->hw_features &= ~NETIF_F_LRO;
@@ -2835,7 +2844,7 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
 		bp->rx_dir = DMA_BIDIRECTIONAL;
 		bp->rx_skb_func = bnxt_rx_page_skb;
 	} else {
-		bp->dev->max_mtu = BNXT_MAX_MTU;
+		bp->dev->max_mtu = bp->max_mtu;
 		bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
 		bp->rx_dir = DMA_FROM_DEVICE;
 		bp->rx_skb_func = bnxt_rx_skb;
@@ -4528,19 +4537,46 @@ static int bnxt_hwrm_check_tx_rings(struct bnxt *bp, int tx_rings)
 	return 0;
 }
 
-static void bnxt_hwrm_set_coal_params(struct bnxt *bp, u32 max_bufs,
-	u32 buf_tmrs, u16 flags,
+static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal,
 	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
 {
+	u16 val, tmr, max, flags;
+
+	max = hw_coal->bufs_per_record * 128;
+	if (hw_coal->budget)
+		max = hw_coal->bufs_per_record * hw_coal->budget;
+
+	val = clamp_t(u16, hw_coal->coal_bufs, 1, max);
+	req->num_cmpl_aggr_int = cpu_to_le16(val);
+
+	/* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */
+	val = min_t(u16, val, 63);
+	req->num_cmpl_dma_aggr = cpu_to_le16(val);
+
+	/* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */
+	val = clamp_t(u16, hw_coal->coal_bufs_irq, 1, 63);
+	req->num_cmpl_dma_aggr_during_int = cpu_to_le16(val);
+
+	tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks);
+	tmr = max_t(u16, tmr, 1);
+	req->int_lat_tmr_max = cpu_to_le16(tmr);
+
+	/* min timer set to 1/2 of interrupt timer */
+	val = tmr / 2;
+	req->int_lat_tmr_min = cpu_to_le16(val);
+
+	/* buf timer set to 1/4 of interrupt timer */
+	val = max_t(u16, tmr / 4, 1);
+	req->cmpl_aggr_dma_tmr = cpu_to_le16(val);
+
+	tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks_irq);
+	tmr = max_t(u16, tmr, 1);
+	req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr);
+
+	flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET;
+	if (hw_coal->idle_thresh && hw_coal->coal_ticks < hw_coal->idle_thresh)
+		flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE;
 	req->flags = cpu_to_le16(flags);
-	req->num_cmpl_dma_aggr = cpu_to_le16((u16)max_bufs);
-	req->num_cmpl_dma_aggr_during_int = cpu_to_le16(max_bufs >> 16);
-	req->cmpl_aggr_dma_tmr = cpu_to_le16((u16)buf_tmrs);
-	req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(buf_tmrs >> 16);
-	/* Minimum time between 2 interrupts set to buf_tmr x 2 */
-	req->int_lat_tmr_min = cpu_to_le16((u16)buf_tmrs * 2);
-	req->int_lat_tmr_max = cpu_to_le16((u16)buf_tmrs * 4);
-	req->num_cmpl_aggr_int = cpu_to_le16((u16)max_bufs * 4);
 }
 
 int bnxt_hwrm_set_coal(struct bnxt *bp)
@@ -4548,51 +4584,14 @@ int bnxt_hwrm_set_coal(struct bnxt *bp)
 	int i, rc = 0;
 	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0},
 							   req_tx = {0}, *req;
-	u16 max_buf, max_buf_irq;
-	u16 buf_tmr, buf_tmr_irq;
-	u32 flags;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req_rx,
 			       HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
 	bnxt_hwrm_cmd_hdr_init(bp, &req_tx,
 			       HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
 
-	/* Each rx completion (2 records) should be DMAed immediately.
-	 * DMA 1/4 of the completion buffers at a time.
-	 */
-	max_buf = min_t(u16, bp->rx_coal_bufs / 4, 2);
-	/* max_buf must not be zero */
-	max_buf = clamp_t(u16, max_buf, 1, 63);
-	max_buf_irq = clamp_t(u16, bp->rx_coal_bufs_irq, 1, 63);
-	buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal_ticks);
-	/* buf timer set to 1/4 of interrupt timer */
-	buf_tmr = max_t(u16, buf_tmr / 4, 1);
-	buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal_ticks_irq);
-	buf_tmr_irq = max_t(u16, buf_tmr_irq, 1);
-
-	flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET;
-
-	/* RING_IDLE generates more IRQs for lower latency.  Enable it only
-	 * if coal_ticks is less than 25 us.
-	 */
-	if (bp->rx_coal_ticks < 25)
-		flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE;
-
-	bnxt_hwrm_set_coal_params(bp, max_buf_irq << 16 | max_buf,
-				  buf_tmr_irq << 16 | buf_tmr, flags, &req_rx);
-
-	/* max_buf must not be zero */
-	max_buf = clamp_t(u16, bp->tx_coal_bufs, 1, 63);
-	max_buf_irq = clamp_t(u16, bp->tx_coal_bufs_irq, 1, 63);
-	buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal_ticks);
-	/* buf timer set to 1/4 of interrupt timer */
-	buf_tmr = max_t(u16, buf_tmr / 4, 1);
-	buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal_ticks_irq);
-	buf_tmr_irq = max_t(u16, buf_tmr_irq, 1);
-
-	flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET;
-	bnxt_hwrm_set_coal_params(bp, max_buf_irq << 16 | max_buf,
-				  buf_tmr_irq << 16 | buf_tmr, flags, &req_tx);
+	bnxt_hwrm_set_coal_params(&bp->rx_coal, &req_rx);
+	bnxt_hwrm_set_coal_params(&bp->tx_coal, &req_tx);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -4724,6 +4723,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 	else
 		bp->br_mode = BRIDGE_MODE_UNDEF;
 
+	bp->max_mtu = le16_to_cpu(resp->max_mtu_configured);
+	if (!bp->max_mtu)
+		bp->max_mtu = BNXT_MAX_MTU;
+
 func_qcfg_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -4884,9 +4887,9 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
 			    resp->hwrm_intf_upd);
 		netdev_warn(bp->dev, "Please update firmware with HWRM interface 1.0.0 or newer.\n");
 	}
-	snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d/%d.%d.%d",
+	snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d.%d",
 		 resp->hwrm_fw_maj, resp->hwrm_fw_min, resp->hwrm_fw_bld,
-		 resp->hwrm_intf_maj, resp->hwrm_intf_min, resp->hwrm_intf_upd);
+		 resp->hwrm_fw_rsvd);
 
 	bp->hwrm_cmd_timeout = le16_to_cpu(resp->def_req_timeout);
 	if (!bp->hwrm_cmd_timeout)
@@ -4912,16 +4915,14 @@ hwrm_ver_get_exit:
 
 int bnxt_hwrm_fw_set_time(struct bnxt *bp)
 {
-#if IS_ENABLED(CONFIG_RTC_LIB)
 	struct hwrm_fw_set_time_input req = {0};
-	struct rtc_time tm;
-	struct timeval tv;
+	struct tm tm;
+	time64_t now = ktime_get_real_seconds();
 
 	if (bp->hwrm_spec_code < 0x10400)
 		return -EOPNOTSUPP;
 
-	do_gettimeofday(&tv);
-	rtc_time_to_tm(tv.tv_sec, &tm);
+	time64_to_tm(now, 0, &tm);
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1);
 	req.year = cpu_to_le16(1900 + tm.tm_year);
 	req.month = 1 + tm.tm_mon;
@@ -4930,9 +4931,6 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp)
 	req.minute = tm.tm_min;
 	req.second = tm.tm_sec;
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-#else
-	return -EOPNOTSUPP;
-#endif
 }
 
 static int bnxt_hwrm_port_qstats(struct bnxt *bp)
@@ -6980,6 +6978,11 @@ static void bnxt_timer(unsigned long data)
 		set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
 		bnxt_queue_sp_work(bp);
 	}
+
+	if (bnxt_tc_flower_enabled(bp)) {
+		set_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event);
+		bnxt_queue_sp_work(bp);
+	}
 bnxt_restart_timer:
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
 }
@@ -7070,6 +7073,10 @@ static void bnxt_sp_task(struct work_struct *work)
 		bnxt_get_port_module_status(bp);
 		mutex_unlock(&bp->link_lock);
 	}
+
+	if (test_and_clear_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event))
+		bnxt_tc_flow_stats_work(bp);
+
 	/* These functions below will clear BNXT_STATE_IN_SP_TASK.  They
 	 * must be the last functions to be called before exiting.
 	 */
@@ -7133,6 +7140,32 @@ static void bnxt_cleanup_pci(struct bnxt *bp)
 	pci_disable_device(bp->pdev);
 }
 
+static void bnxt_init_dflt_coal(struct bnxt *bp)
+{
+	struct bnxt_coal *coal;
+
+	/* Tick values in micro seconds.
+	 * 1 coal_buf x bufs_per_record = 1 completion record.
+	 */
+	coal = &bp->rx_coal;
+	coal->coal_ticks = 14;
+	coal->coal_bufs = 30;
+	coal->coal_ticks_irq = 1;
+	coal->coal_bufs_irq = 2;
+	coal->idle_thresh = 25;
+	coal->bufs_per_record = 2;
+	coal->budget = 64;		/* NAPI budget */
+
+	coal = &bp->tx_coal;
+	coal->coal_ticks = 28;
+	coal->coal_bufs = 30;
+	coal->coal_ticks_irq = 2;
+	coal->coal_bufs_irq = 2;
+	coal->bufs_per_record = 1;
+
+	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
+}
+
 static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 {
 	int rc;
@@ -7201,22 +7234,9 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 	bp->rx_ring_size = BNXT_DEFAULT_RX_RING_SIZE;
 	bp->tx_ring_size = BNXT_DEFAULT_TX_RING_SIZE;
 
-	/* tick values in micro seconds */
-	bp->rx_coal_ticks = 12;
-	bp->rx_coal_bufs = 30;
-	bp->rx_coal_ticks_irq = 1;
-	bp->rx_coal_bufs_irq = 2;
+	bnxt_init_dflt_coal(bp);
 
-	bp->tx_coal_ticks = 25;
-	bp->tx_coal_bufs = 30;
-	bp->tx_coal_ticks_irq = 2;
-	bp->tx_coal_bufs_irq = 2;
-
-	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
-
-	init_timer(&bp->timer);
-	bp->timer.data = (unsigned long)bp;
-	bp->timer.function = bnxt_timer;
+	setup_timer(&bp->timer, bnxt_timer, (unsigned long)bp);
 	bp->current_interval = BNXT_TIMER_INTERVAL;
 
 	clear_bit(BNXT_STATE_OPEN, &bp->state);
@@ -7243,13 +7263,13 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p)
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+	if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+		return 0;
+
 	rc = bnxt_approve_mac(bp, addr->sa_data);
 	if (rc)
 		return rc;
 
-	if (ether_addr_equal(addr->sa_data, dev->dev_addr))
-		return 0;
-
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
 	if (netif_running(dev)) {
 		bnxt_close_nic(bp, false, false);
@@ -7321,24 +7341,49 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
-static int bnxt_setup_flower(struct net_device *dev,
-			     struct tc_cls_flower_offload *cls_flower)
+static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				  void *cb_priv)
+{
+	struct bnxt *bp = cb_priv;
+
+	if (!bnxt_tc_flower_enabled(bp) || !tc_can_offload(bp->dev))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int bnxt_setup_tc_block(struct net_device *dev,
+			       struct tc_block_offload *f)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (BNXT_VF(bp))
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
-	return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, cls_flower);
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb,
+					     bp, bp);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			 void *type_data)
 {
 	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return bnxt_setup_flower(dev, type_data);
-	case TC_SETUP_MQPRIO: {
+	case TC_SETUP_BLOCK:
+		return bnxt_setup_tc_block(dev, type_data);
+	case TC_SETUP_QDISC_MQPRIO: {
 		struct tc_mqprio_qopt *mqprio = type_data;
 
 		mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
@@ -7725,7 +7770,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
 #endif
 	.ndo_udp_tunnel_add	= bnxt_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= bnxt_udp_tunnel_del,
-	.ndo_xdp		= bnxt_xdp,
+	.ndo_bpf		= bnxt_xdp,
 	.ndo_bridge_getlink	= bnxt_bridge_getlink,
 	.ndo_bridge_setlink	= bnxt_bridge_setlink,
 	.ndo_get_phys_port_name = bnxt_get_phys_port_name
@@ -8064,10 +8109,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->features |= dev->hw_features | NETIF_F_HIGHDMA;
 	dev->priv_flags |= IFF_UNICAST_FLT;
 
-	/* MTU range: 60 - 9500 */
-	dev->min_mtu = ETH_ZLEN;
-	dev->max_mtu = BNXT_MAX_MTU;
-
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
 	mutex_init(&bp->sriov_lock);
@@ -8115,6 +8156,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_ethtool_init(bp);
 	bnxt_dcb_init(bp);
 
+	/* MTU range: 60 - FW defined max */
+	dev->min_mtu = ETH_ZLEN;
+	dev->max_mtu = bp->max_mtu;
+
 	rc = bnxt_probe_phy(bp);
 	if (rc)
 		goto init_err_pci_clean;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index c911e69ff25f..5359a1f0045f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -944,6 +944,22 @@ struct bnxt_test_info {
 #define BNXT_CAG_REG_LEGACY_INT_STATUS	0x4014
 #define BNXT_CAG_REG_BASE		0x300000
 
+struct bnxt_coal {
+	u16			coal_ticks;
+	u16			coal_ticks_irq;
+	u16			coal_bufs;
+	u16			coal_bufs_irq;
+			/* RING_IDLE enabled when coal ticks < idle_thresh  */
+	u16			idle_thresh;
+	u8			bufs_per_record;
+	u8			budget;
+};
+
+struct bnxt_tc_flow_stats {
+	u64		packets;
+	u64		bytes;
+};
+
 struct bnxt_tc_info {
 	bool				enabled;
 
@@ -954,12 +970,29 @@ struct bnxt_tc_info {
 	/* hash table to store L2 keys of TC flows */
 	struct rhashtable		l2_table;
 	struct rhashtable_params	l2_ht_params;
+	/* hash table to store L2 keys for TC tunnel decap */
+	struct rhashtable		decap_l2_table;
+	struct rhashtable_params	decap_l2_ht_params;
+	/* hash table to store tunnel decap entries */
+	struct rhashtable		decap_table;
+	struct rhashtable_params	decap_ht_params;
+	/* hash table to store tunnel encap entries */
+	struct rhashtable		encap_table;
+	struct rhashtable_params	encap_ht_params;
 
 	/* lock to atomically add/del an l2 node when a flow is
 	 * added or deleted.
 	 */
 	struct mutex			lock;
 
+	/* Fields used for batching stats query */
+	struct rhashtable_iter		iter;
+#define BNXT_FLOW_STATS_BATCH_MAX	10
+	struct bnxt_tc_stats_batch {
+		void			  *flow_node;
+		struct bnxt_tc_flow_stats hw_stats;
+	} stats_batch[BNXT_FLOW_STATS_BATCH_MAX];
+
 	/* Stat counter mask (width) */
 	u64				bytes_mask;
 	u64				packets_mask;
@@ -1013,6 +1046,7 @@ struct bnxt {
 #define CHIP_NUM_5745X		0xd730
 
 #define CHIP_NUM_58802		0xd802
+#define CHIP_NUM_58804		0xd804
 #define CHIP_NUM_58808		0xd808
 
 #define BNXT_CHIP_NUM_5730X(chip_num)		\
@@ -1048,6 +1082,7 @@ struct bnxt {
 
 #define BNXT_CHIP_NUM_588XX(chip_num)		\
 	((chip_num) == CHIP_NUM_58802 ||	\
+	 (chip_num) == CHIP_NUM_58804 ||        \
 	 (chip_num) == CHIP_NUM_58808)
 
 	struct net_device	*dev;
@@ -1170,6 +1205,7 @@ struct bnxt {
 	int			nr_vnics;
 	u32			rss_hash_cfg;
 
+	u16			max_mtu;
 	u8			max_tc;
 	u8			max_lltc;	/* lossless TCs */
 	struct bnxt_queue_info	q_info[BNXT_MAX_QUEUE];
@@ -1232,14 +1268,8 @@ struct bnxt {
 	u8			port_count;
 	u16			br_mode;
 
-	u16			rx_coal_ticks;
-	u16			rx_coal_ticks_irq;
-	u16			rx_coal_bufs;
-	u16			rx_coal_bufs_irq;
-	u16			tx_coal_ticks;
-	u16			tx_coal_ticks_irq;
-	u16			tx_coal_bufs;
-	u16			tx_coal_bufs_irq;
+	struct bnxt_coal	rx_coal;
+	struct bnxt_coal	tx_coal;
 
 #define BNXT_USEC_TO_COAL_TIMER(x)	((x) * 25 / 2)
 
@@ -1265,6 +1295,7 @@ struct bnxt {
 #define BNXT_GENEVE_ADD_PORT_SP_EVENT	12
 #define BNXT_GENEVE_DEL_PORT_SP_EVENT	13
 #define BNXT_LINK_SPEED_CHNG_SP_EVENT	14
+#define BNXT_FLOW_STATS_SP_EVENT	15
 
 	struct bnxt_pf_info	pf;
 #ifdef CONFIG_BNXT_SRIOV
@@ -1315,7 +1346,7 @@ struct bnxt {
 	enum devlink_eswitch_mode eswitch_mode;
 	struct bnxt_vf_rep	**vf_reps; /* array of vf-rep ptrs */
 	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
-	struct bnxt_tc_info	tc_info;
+	struct bnxt_tc_info	*tc_info;
 };
 
 #define BNXT_RX_STATS_OFFSET(counter)			\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
new file mode 100644
index 000000000000..402fa32f7a88
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -0,0 +1,65 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_vfr.h"
+#include "bnxt_devlink.h"
+
+static const struct devlink_ops bnxt_dl_ops = {
+#ifdef CONFIG_BNXT_SRIOV
+	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
+	.eswitch_mode_get = bnxt_dl_eswitch_mode_get,
+#endif /* CONFIG_BNXT_SRIOV */
+};
+
+int bnxt_dl_register(struct bnxt *bp)
+{
+	struct devlink *dl;
+	int rc;
+
+	if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
+		return 0;
+
+	if (bp->hwrm_spec_code < 0x10803) {
+		netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n");
+		return -ENOTSUPP;
+	}
+
+	dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+	if (!dl) {
+		netdev_warn(bp->dev, "devlink_alloc failed");
+		return -ENOMEM;
+	}
+
+	bnxt_link_bp_to_dl(bp, dl);
+	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+	rc = devlink_register(dl, &bp->pdev->dev);
+	if (rc) {
+		bnxt_link_bp_to_dl(bp, NULL);
+		devlink_free(dl);
+		netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+void bnxt_dl_unregister(struct bnxt *bp)
+{
+	struct devlink *dl = bp->dl;
+
+	if (!dl)
+		return;
+
+	devlink_unregister(dl);
+	devlink_free(dl);
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
new file mode 100644
index 000000000000..e92a35d8b642
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -0,0 +1,39 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_DEVLINK_H
+#define BNXT_DEVLINK_H
+
+/* Struct to hold housekeeping info needed by devlink interface */
+struct bnxt_dl {
+	struct bnxt *bp;	/* back ptr to the controlling dev */
+};
+
+static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
+{
+	return ((struct bnxt_dl *)devlink_priv(dl))->bp;
+}
+
+/* To clear devlink pointer from bp, pass NULL dl */
+static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
+{
+	bp->dl = dl;
+
+	/* add a back pointer in dl to bp */
+	if (dl) {
+		struct bnxt_dl *bp_dl = devlink_priv(dl);
+
+		bp_dl->bp = bp;
+	}
+}
+
+int bnxt_dl_register(struct bnxt *bp);
+void bnxt_dl_unregister(struct bnxt *bp);
+
+#endif /* BNXT_DEVLINK_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 3cbe771b3352..7ce1d4b7e67d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -26,8 +26,6 @@
 #define FLASH_PACKAGE_TIMEOUT	((HWRM_CMD_TIMEOUT) * 200)
 #define INSTALL_PACKAGE_TIMEOUT	((HWRM_CMD_TIMEOUT) * 200)
 
-static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen);
-
 static u32 bnxt_get_msglevel(struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
@@ -46,19 +44,24 @@ static int bnxt_get_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
 	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_coal *hw_coal;
+	u16 mult;
 
 	memset(coal, 0, sizeof(*coal));
 
-	coal->rx_coalesce_usecs = bp->rx_coal_ticks;
-	/* 2 completion records per rx packet */
-	coal->rx_max_coalesced_frames = bp->rx_coal_bufs / 2;
-	coal->rx_coalesce_usecs_irq = bp->rx_coal_ticks_irq;
-	coal->rx_max_coalesced_frames_irq = bp->rx_coal_bufs_irq / 2;
+	hw_coal = &bp->rx_coal;
+	mult = hw_coal->bufs_per_record;
+	coal->rx_coalesce_usecs = hw_coal->coal_ticks;
+	coal->rx_max_coalesced_frames = hw_coal->coal_bufs / mult;
+	coal->rx_coalesce_usecs_irq = hw_coal->coal_ticks_irq;
+	coal->rx_max_coalesced_frames_irq = hw_coal->coal_bufs_irq / mult;
 
-	coal->tx_coalesce_usecs = bp->tx_coal_ticks;
-	coal->tx_max_coalesced_frames = bp->tx_coal_bufs;
-	coal->tx_coalesce_usecs_irq = bp->tx_coal_ticks_irq;
-	coal->tx_max_coalesced_frames_irq = bp->tx_coal_bufs_irq;
+	hw_coal = &bp->tx_coal;
+	mult = hw_coal->bufs_per_record;
+	coal->tx_coalesce_usecs = hw_coal->coal_ticks;
+	coal->tx_max_coalesced_frames = hw_coal->coal_bufs / mult;
+	coal->tx_coalesce_usecs_irq = hw_coal->coal_ticks_irq;
+	coal->tx_max_coalesced_frames_irq = hw_coal->coal_bufs_irq / mult;
 
 	coal->stats_block_coalesce_usecs = bp->stats_coal_ticks;
 
@@ -70,18 +73,23 @@ static int bnxt_set_coalesce(struct net_device *dev,
 {
 	struct bnxt *bp = netdev_priv(dev);
 	bool update_stats = false;
+	struct bnxt_coal *hw_coal;
 	int rc = 0;
-
-	bp->rx_coal_ticks = coal->rx_coalesce_usecs;
-	/* 2 completion records per rx packet */
-	bp->rx_coal_bufs = coal->rx_max_coalesced_frames * 2;
-	bp->rx_coal_ticks_irq = coal->rx_coalesce_usecs_irq;
-	bp->rx_coal_bufs_irq = coal->rx_max_coalesced_frames_irq * 2;
-
-	bp->tx_coal_ticks = coal->tx_coalesce_usecs;
-	bp->tx_coal_bufs = coal->tx_max_coalesced_frames;
-	bp->tx_coal_ticks_irq = coal->tx_coalesce_usecs_irq;
-	bp->tx_coal_bufs_irq = coal->tx_max_coalesced_frames_irq;
+	u16 mult;
+
+	hw_coal = &bp->rx_coal;
+	mult = hw_coal->bufs_per_record;
+	hw_coal->coal_ticks = coal->rx_coalesce_usecs;
+	hw_coal->coal_bufs = coal->rx_max_coalesced_frames * mult;
+	hw_coal->coal_ticks_irq = coal->rx_coalesce_usecs_irq;
+	hw_coal->coal_bufs_irq = coal->rx_max_coalesced_frames_irq * mult;
+
+	hw_coal = &bp->tx_coal;
+	mult = hw_coal->bufs_per_record;
+	hw_coal->coal_ticks = coal->tx_coalesce_usecs;
+	hw_coal->coal_bufs = coal->tx_max_coalesced_frames * mult;
+	hw_coal->coal_ticks_irq = coal->tx_coalesce_usecs_irq;
+	hw_coal->coal_bufs_irq = coal->tx_max_coalesced_frames_irq * mult;
 
 	if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) {
 		u32 stats_ticks = coal->stats_block_coalesce_usecs;
@@ -822,20 +830,10 @@ static void bnxt_get_drvinfo(struct net_device *dev,
 			     struct ethtool_drvinfo *info)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	char *pkglog;
-	char *pkgver = NULL;
 
-	pkglog = kmalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL);
-	if (pkglog)
-		pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH);
 	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
-	if (pkgver && *pkgver != 0 && isdigit(*pkgver))
-		snprintf(info->fw_version, sizeof(info->fw_version) - 1,
-			 "%s pkg %s", bp->fw_ver_str, pkgver);
-	else
-		strlcpy(info->fw_version, bp->fw_ver_str,
-			sizeof(info->fw_version));
+	strlcpy(info->fw_version, bp->fw_ver_str, sizeof(info->fw_version));
 	strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info));
 	info->n_stats = bnxt_get_num_stats(bp);
 	info->testinfo_len = bp->num_tests;
@@ -843,7 +841,6 @@ static void bnxt_get_drvinfo(struct net_device *dev,
 	info->eedump_len = 0;
 	/* TODO CHIMP FW: reg dump details */
 	info->regdump_len = 0;
-	kfree(pkglog);
 }
 
 static void bnxt_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -1350,7 +1347,6 @@ static int bnxt_firmware_reset(struct net_device *dev,
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
 
-	/* TODO: Support ASAP ChiMP self-reset (e.g. upon PF driver unload) */
 	/* TODO: Address self-reset of APE/KONG/BONO/TANG or ungraceful reset */
 	/*       (e.g. when firmware isn't already running) */
 	switch (dir_type) {
@@ -1376,6 +1372,10 @@ static int bnxt_firmware_reset(struct net_device *dev,
 	case BNX_DIR_TYPE_BONO_PATCH:
 		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE;
 		break;
+	case BNXT_FW_RESET_CHIP:
+		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
+		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1773,6 +1773,9 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
 	dma_addr_t dma_handle;
 	struct hwrm_nvm_read_input req = {0};
 
+	if (!length)
+		return -EINVAL;
+
 	buf = dma_alloc_coherent(&bp->pdev->dev, length, &dma_handle,
 				 GFP_KERNEL);
 	if (!buf) {
@@ -2495,13 +2498,59 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 	}
 }
 
+static int bnxt_reset(struct net_device *dev, u32 *flags)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc = 0;
+
+	if (!BNXT_PF(bp)) {
+		netdev_err(dev, "Reset is not supported from a VF\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (pci_vfs_assigned(bp->pdev)) {
+		netdev_err(dev,
+			   "Reset not allowed when VFs are assigned to VMs\n");
+		return -EBUSY;
+	}
+
+	if (*flags == ETH_RESET_ALL) {
+		/* This feature is not supported in older firmware versions */
+		if (bp->hwrm_spec_code < 0x10803)
+			return -EOPNOTSUPP;
+
+		rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_CHIP);
+		if (!rc)
+			netdev_info(dev, "Reset request successful. Reload driver to complete reset\n");
+	} else {
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
 void bnxt_ethtool_init(struct bnxt *bp)
 {
 	struct hwrm_selftest_qlist_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_selftest_qlist_input req = {0};
 	struct bnxt_test_info *test_info;
+	struct net_device *dev = bp->dev;
+	char *pkglog;
 	int i, rc;
 
+	pkglog = kzalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL);
+	if (pkglog) {
+		char *pkgver;
+		int len;
+
+		pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH);
+		if (pkgver && *pkgver != 0 && isdigit(*pkgver)) {
+			len = strlen(bp->fw_ver_str);
+			snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1,
+				 "/pkg %s", pkgver);
+		}
+		kfree(pkglog);
+	}
 	if (bp->hwrm_spec_code < 0x10704 || !BNXT_SINGLE_PF(bp))
 		return;
 
@@ -2592,4 +2641,5 @@ const struct ethtool_ops bnxt_ethtool_ops = {
 	.nway_reset		= bnxt_nway_reset,
 	.set_phys_id		= bnxt_set_phys_id,
 	.self_test		= bnxt_self_test,
+	.reset			= bnxt_reset,
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
index f1bc90b6fb5b..ff601b42fcc8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
@@ -34,6 +34,8 @@ struct bnxt_led_cfg {
 #define BNXT_LED_DFLT_ENABLES(x)			\
 	cpu_to_le32(BNXT_LED_DFLT_ENA << (BNXT_LED_DFLT_ENA_SHIFT * (x)))
 
+#define BNXT_FW_RESET_CHIP	0xffff
+
 extern const struct ethtool_ops bnxt_ethtool_ops;
 
 u32 _bnxt_fw_to_ethtool_adv_spds(u16, u8);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index cb04cc76e8ad..c99f4d0880e4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -11,21 +11,21 @@
 #ifndef BNXT_HSI_H
 #define BNXT_HSI_H
 
-/* HSI and HWRM Specification 1.8.1 */
+/* HSI and HWRM Specification 1.8.3 */
 #define HWRM_VERSION_MAJOR	1
 #define HWRM_VERSION_MINOR	8
-#define HWRM_VERSION_UPDATE	1
+#define HWRM_VERSION_UPDATE	3
 
-#define HWRM_VERSION_RSVD	4 /* non-zero means beta version */
+#define HWRM_VERSION_RSVD	1 /* non-zero means beta version */
 
-#define HWRM_VERSION_STR	"1.8.1.4"
+#define HWRM_VERSION_STR	"1.8.3.1"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
  */
 #define HWRM_NA_SIGNATURE	((__le32)(-1))
 #define HWRM_MAX_REQ_LEN    (128)  /* hwrm_func_buf_rgtr */
-#define HWRM_MAX_RESP_LEN    (248)  /* hwrm_selftest_qlist */
+#define HWRM_MAX_RESP_LEN    (280)  /* hwrm_selftest_qlist */
 #define HW_HASH_INDEX_SIZE      0x80    /* 7 bit indirection table index. */
 #define HW_HASH_KEY_SIZE	40
 #define HWRM_RESP_VALID_KEY      1 /* valid key for HWRM response */
@@ -111,6 +111,7 @@ struct hwrm_async_event_cmpl {
 	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE      0x31UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE	   0x33UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_LLFC_PFC_CHANGE	   0x34UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR		   0xffUL
 	__le32 event_data2;
 	u8 opaque_v;
@@ -835,8 +836,7 @@ struct hwrm_func_qcfg_output {
 	u8 port_pf_cnt;
 	#define FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL		   0x0UL
 	__le16 dflt_vnic_id;
-	u8 unused_0;
-	u8 unused_1;
+	__le16 max_mtu_configured;
 	__le32 min_bw;
 	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK		    0xfffffffUL
 	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT		    0
@@ -873,12 +873,12 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_EVB_MODE_NO_EVB			   0x0UL
 	#define FUNC_QCFG_RESP_EVB_MODE_VEB			   0x1UL
 	#define FUNC_QCFG_RESP_EVB_MODE_VEPA			   0x2UL
-	u8 unused_2;
+	u8 unused_0;
 	__le16 alloc_vfs;
 	__le32 alloc_mcast_filters;
 	__le32 alloc_hw_ring_grps;
 	__le16 alloc_sp_tx_rings;
-	u8 unused_3;
+	u8 unused_1;
 	u8 valid;
 };
 
@@ -3407,6 +3407,7 @@ struct hwrm_vnic_cfg_input {
 	#define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE		    0x8UL
 	#define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE		    0x10UL
 	#define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE		    0x20UL
+	#define VNIC_CFG_REQ_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE 0x40UL
 	__le32 enables;
 	#define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP		    0x1UL
 	#define VNIC_CFG_REQ_ENABLES_RSS_RULE			    0x2UL
@@ -3463,6 +3464,7 @@ struct hwrm_vnic_qcaps_output {
 	#define VNIC_QCAPS_RESP_FLAGS_ROCE_DUAL_VNIC_CAP	    0x8UL
 	#define VNIC_QCAPS_RESP_FLAGS_ROCE_ONLY_VNIC_CAP	    0x10UL
 	#define VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP		    0x20UL
+	#define VNIC_QCAPS_RESP_FLAGS_ROCE_MIRROING_CAPABLE_VNIC_CAP 0x40UL
 	__le32 unused_2;
 	u8 unused_3;
 	u8 unused_4;
@@ -3994,6 +3996,7 @@ struct hwrm_cfa_l2_filter_alloc_input {
 	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS	   0x6UL
 	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT	   0x7UL
 	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE	   0x8UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4      0x9UL
 	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL     0xffUL
 	u8 unused_7;
 	__le16 dst_id;
@@ -4122,6 +4125,14 @@ struct hwrm_cfa_l2_set_rx_mask_output {
 	u8 valid;
 };
 
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_cfa_l2_set_rx_mask_cmd_err {
+	u8 code;
+	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN	   0x0UL
+	#define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL
+	u8 unused_0[7];
+};
+
 /* hwrm_cfa_tunnel_filter_alloc */
 /* Input (88 bytes) */
 struct hwrm_cfa_tunnel_filter_alloc_input {
@@ -4161,6 +4172,7 @@ struct hwrm_cfa_tunnel_filter_alloc_input {
 	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
 	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
 	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4  0x9UL
 	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
 	u8 unused_0;
 	__le32 vni;
@@ -4323,6 +4335,7 @@ struct hwrm_cfa_ntuple_filter_alloc_input {
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4  0x9UL
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
 	u8 pri_hint;
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER    0x0UL
@@ -4355,6 +4368,14 @@ struct hwrm_cfa_ntuple_filter_alloc_output {
 	u8 valid;
 };
 
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_cfa_ntuple_filter_alloc_cmd_err {
+	u8 code;
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN      0x0UL
+	#define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL
+	u8 unused_0[7];
+};
+
 /* hwrm_cfa_ntuple_filter_free */
 /* Input (24 bytes) */
 struct hwrm_cfa_ntuple_filter_free_input {
@@ -4413,6 +4434,116 @@ struct hwrm_cfa_ntuple_filter_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_cfa_decap_filter_alloc */
+/* Input (104 bytes) */
+struct hwrm_cfa_decap_filter_alloc_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le32 flags;
+	#define CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL	    0x1UL
+	__le32 enables;
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE     0x1UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID       0x2UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR     0x4UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR     0x8UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_OVLAN_VID       0x10UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IVLAN_VID       0x20UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_OVLAN_VID     0x40UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID     0x80UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE       0x100UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR      0x200UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR      0x400UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE     0x800UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL     0x1000UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_PORT	    0x2000UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT	    0x4000UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_ID	    0x8000UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID  0x10000UL
+	__be32 tunnel_id;
+	u8 tunnel_type;
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL  0x0UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN      0x1UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE      0x2UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE      0x3UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP       0x4UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE     0x5UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS       0x6UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT	   0x7UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE      0x8UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4   0x9UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL  0xffUL
+	u8 unused_0;
+	__le16 unused_1;
+	u8 src_macaddr[6];
+	u8 unused_2;
+	u8 unused_3;
+	u8 dst_macaddr[6];
+	__be16 ovlan_vid;
+	__be16 ivlan_vid;
+	__be16 t_ovlan_vid;
+	__be16 t_ivlan_vid;
+	__be16 ethertype;
+	u8 ip_addr_type;
+	#define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN   0x0UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4      0x4UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6      0x6UL
+	u8 ip_protocol;
+	#define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN    0x0UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP	   0x6UL
+	#define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP	   0x11UL
+	u8 unused_4;
+	u8 unused_5;
+	u8 unused_6[3];
+	u8 unused_7;
+	__be32 src_ipaddr[4];
+	__be32 dst_ipaddr[4];
+	__be16 src_port;
+	__be16 dst_port;
+	__le16 dst_id;
+	__le16 l2_ctxt_ref_id;
+};
+
+/* Output (16 bytes) */
+struct hwrm_cfa_decap_filter_alloc_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 decap_filter_id;
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 valid;
+};
+
+/* hwrm_cfa_decap_filter_free */
+/* Input (24 bytes) */
+struct hwrm_cfa_decap_filter_free_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le32 decap_filter_id;
+	__le32 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_cfa_decap_filter_free_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 /* hwrm_cfa_flow_alloc */
 /* Input (128 bytes) */
 struct hwrm_cfa_flow_alloc_input {
@@ -4634,6 +4765,7 @@ struct hwrm_tunnel_dst_port_query_input {
 	u8 tunnel_type;
 	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN       0x1UL
 	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE      0x5UL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4    0x9UL
 	u8 unused_0[7];
 };
 
@@ -4662,9 +4794,10 @@ struct hwrm_tunnel_dst_port_alloc_input {
 	u8 tunnel_type;
 	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN       0x1UL
 	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE      0x5UL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4    0x9UL
 	u8 unused_0;
 	__be16 tunnel_dst_port_val;
-	__le32 unused_1;
+	__be32 unused_1;
 };
 
 /* Output (16 bytes) */
@@ -4693,6 +4826,7 @@ struct hwrm_tunnel_dst_port_free_input {
 	u8 tunnel_type;
 	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN	   0x1UL
 	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE       0x5UL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4     0x9UL
 	u8 unused_0;
 	__le16 tunnel_dst_port_id;
 	__le32 unused_1;
@@ -4848,6 +4982,8 @@ struct hwrm_fw_reset_input {
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST		   0x4UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP		   0x5UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP		   0x6UL
 	u8 selfrst_status;
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE	   0x0UL
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP	   0x1UL
@@ -4888,6 +5024,8 @@ struct hwrm_fw_qstatus_input {
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_HOST		   0x4UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_AP		   0x5UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_CHIP		   0x6UL
 	u8 unused_0[7];
 };
 
@@ -5324,6 +5462,32 @@ struct hwrm_wol_reason_qcfg_output {
 	u8 valid;
 };
 
+/* hwrm_dbg_read_direct */
+/* Input (32 bytes) */
+struct hwrm_dbg_read_direct_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 host_dest_addr;
+	__le32 read_addr;
+	__le32 read_len32;
+};
+
+/* Output (16 bytes) */
+struct hwrm_dbg_read_direct_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 /* hwrm_nvm_read */
 /* Input (40 bytes) */
 struct hwrm_nvm_read_input {
@@ -5676,6 +5840,105 @@ struct hwrm_nvm_install_update_cmd_err {
 	u8 unused_0[7];
 };
 
+/* hwrm_nvm_get_variable */
+/* Input (40 bytes) */
+struct hwrm_nvm_get_variable_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 dest_data_addr;
+	__le16 data_len;
+	__le16 option_num;
+	#define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_0		   0x0UL
+	#define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF	   0xffffUL
+	__le16 dimensions;
+	__le16 index_0;
+	__le16 index_1;
+	__le16 index_2;
+	__le16 index_3;
+	u8 flags;
+	#define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT	    0x1UL
+	u8 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_nvm_get_variable_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le16 data_len;
+	__le16 option_num;
+	#define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0	   0x0UL
+	#define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF	   0xffffUL
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 valid;
+};
+
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_nvm_get_variable_cmd_err {
+	u8 code;
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN		   0x0UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST       0x1UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR	   0x2UL
+	#define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT       0x3UL
+	u8 unused_0[7];
+};
+
+/* hwrm_nvm_set_variable */
+/* Input (40 bytes) */
+struct hwrm_nvm_set_variable_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 src_data_addr;
+	__le16 data_len;
+	__le16 option_num;
+	#define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_0		   0x0UL
+	#define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF	   0xffffUL
+	__le16 dimensions;
+	__le16 index_0;
+	__le16 index_1;
+	__le16 index_2;
+	__le16 index_3;
+	u8 flags;
+	#define NVM_SET_VARIABLE_REQ_FLAGS_FORCE_FLUSH		    0x1UL
+	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_MASK       0xeUL
+	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_SFT	    1
+	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_NONE      (0x0UL << 1)
+	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1 (0x1UL << 1)
+	#define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_LAST    NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1
+	u8 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_nvm_set_variable_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_nvm_set_variable_cmd_err {
+	u8 code;
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN		   0x0UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST       0x1UL
+	#define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR	   0x2UL
+	u8 unused_0[7];
+};
+
 /* hwrm_selftest_qlist */
 /* Input (16 bytes) */
 struct hwrm_selftest_qlist_input {
@@ -5686,7 +5949,7 @@ struct hwrm_selftest_qlist_input {
 	__le64 resp_addr;
 };
 
-/* Output (248 bytes) */
+/* Output (280 bytes) */
 struct hwrm_selftest_qlist_output {
 	__le16 error_code;
 	__le16 req_type;
@@ -5698,15 +5961,15 @@ struct hwrm_selftest_qlist_output {
 	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_LINK_TEST      0x2UL
 	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_REGISTER_TEST  0x4UL
 	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_MEMORY_TEST    0x8UL
-	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_EYE_TEST  0x10UL
-	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_EYE_TEST 0x20UL
+	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_SERDES_TEST 0x10UL
+	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_SERDES_TEST 0x20UL
 	u8 offline_tests;
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_NVM_TEST	    0x1UL
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_LINK_TEST	    0x2UL
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_REGISTER_TEST    0x4UL
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_MEMORY_TEST      0x8UL
-	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_EYE_TEST    0x10UL
-	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_EYE_TEST 0x20UL
+	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_SERDES_TEST 0x10UL
+	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_SERDES_TEST 0x20UL
 	u8 unused_0;
 	__le16 test_timeout;
 	u8 unused_1;
@@ -5719,6 +5982,11 @@ struct hwrm_selftest_qlist_output {
 	char test5_name[32];
 	char test6_name[32];
 	char test7_name[32];
+	__le32 unused_3;
+	u8 unused_4;
+	u8 unused_5;
+	u8 unused_6;
+	u8 valid;
 };
 
 /* hwrm_selftest_exec */
@@ -5734,8 +6002,8 @@ struct hwrm_selftest_exec_input {
 	#define SELFTEST_EXEC_REQ_FLAGS_LINK_TEST		    0x2UL
 	#define SELFTEST_EXEC_REQ_FLAGS_REGISTER_TEST		    0x4UL
 	#define SELFTEST_EXEC_REQ_FLAGS_MEMORY_TEST		    0x8UL
-	#define SELFTEST_EXEC_REQ_FLAGS_PCIE_EYE_TEST		    0x10UL
-	#define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_EYE_TEST	    0x20UL
+	#define SELFTEST_EXEC_REQ_FLAGS_PCIE_SERDES_TEST	    0x10UL
+	#define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_SERDES_TEST       0x20UL
 	u8 unused_0[7];
 };
 
@@ -5750,16 +6018,21 @@ struct hwrm_selftest_exec_output {
 	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_LINK_TEST       0x2UL
 	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_REGISTER_TEST   0x4UL
 	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_MEMORY_TEST     0x8UL
-	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_EYE_TEST   0x10UL
-	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_EYE_TEST 0x20UL
+	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_SERDES_TEST 0x10UL
+	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_SERDES_TEST 0x20UL
 	u8 test_success;
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_NVM_TEST	    0x1UL
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_LINK_TEST	    0x2UL
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_REGISTER_TEST      0x4UL
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_MEMORY_TEST	    0x8UL
-	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_EYE_TEST      0x10UL
-	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_EYE_TEST  0x20UL
-	__le16 unused_0[3];
+	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_SERDES_TEST   0x10UL
+	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_SERDES_TEST 0x20UL
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 unused_4;
+	u8 valid;
 };
 
 /* hwrm_selftest_irq */
@@ -5772,12 +6045,50 @@ struct hwrm_selftest_irq_input {
 	__le64 resp_addr;
 };
 
-/* Output (8 bytes) */
+/* Output (16 bytes) */
 struct hwrm_selftest_irq_output {
 	__le16 error_code;
 	__le16 req_type;
 	__le16 seq_id;
 	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_selftest_retrieve_serdes_data */
+/* Input (32 bytes) */
+struct hwrm_selftest_retrieve_serdes_data_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 resp_data_addr;
+	__le32 resp_data_offset;
+	__le16 data_len;
+	u8 flags;
+	#define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_UNUSED_TEST_MASK 0xfUL
+	#define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_UNUSED_TEST_SFT 0
+	#define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_PCIE_SERDES_TEST 0x10UL
+	#define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_ETHERNET_SERDES_TEST 0x20UL
+	u8 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_selftest_retrieve_serdes_data_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le16 total_data_len;
+	__le16 copied_data_len;
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 valid;
 };
 
 /* Hardware Resource Manager Specification */
@@ -5938,10 +6249,16 @@ struct cmd_nums {
 	#define HWRM_CFA_DECAP_FILTER_ALLOC			   (0x108UL)
 	#define HWRM_CFA_DECAP_FILTER_FREE			   (0x109UL)
 	#define HWRM_CFA_VLAN_ANTISPOOF_QCFG			   (0x10aUL)
+	#define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC		   (0x10bUL)
+	#define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE		   (0x10cUL)
+	#define HWRM_CFA_PAIR_ALLOC				   (0x10dUL)
+	#define HWRM_CFA_PAIR_FREE				   (0x10eUL)
+	#define HWRM_CFA_PAIR_INFO				   (0x10fUL)
+	#define HWRM_FW_IPC_MSG				   (0x110UL)
 	#define HWRM_SELFTEST_QLIST				   (0x200UL)
 	#define HWRM_SELFTEST_EXEC				   (0x201UL)
 	#define HWRM_SELFTEST_IRQ				   (0x202UL)
-	#define HWRM_SELFTEST_RETREIVE_EYE_DATA		   (0x203UL)
+	#define HWRM_SELFTEST_RETRIEVE_SERDES_DATA		   (0x203UL)
 	#define HWRM_DBG_READ_DIRECT				   (0xff10UL)
 	#define HWRM_DBG_READ_INDIRECT				   (0xff11UL)
 	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
@@ -5949,6 +6266,9 @@ struct cmd_nums {
 	#define HWRM_DBG_DUMP					   (0xff14UL)
 	#define HWRM_DBG_ERASE_NVM				   (0xff15UL)
 	#define HWRM_DBG_CFG					   (0xff16UL)
+	#define HWRM_DBG_COREDUMP_LIST				   (0xff17UL)
+	#define HWRM_DBG_COREDUMP_INITIATE			   (0xff18UL)
+	#define HWRM_DBG_COREDUMP_RETRIEVE			   (0xff19UL)
 	#define HWRM_NVM_FACTORY_DEFAULTS			   (0xffeeUL)
 	#define HWRM_NVM_VALIDATE_OPTION			   (0xffefUL)
 	#define HWRM_NVM_FLUSH					   (0xfff0UL)
@@ -6123,6 +6443,58 @@ struct rx_port_stats {
 	__le64 rx_stat_err;
 };
 
+/* VXLAN IPv4 encapsulation structure (16 bytes) */
+struct hwrm_vxlan_ipv4_hdr {
+	u8 ver_hlen;
+	#define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_MASK	    0xfUL
+	#define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT	    0
+	#define VXLAN_IPV4_HDR_VER_HLEN_VERSION_MASK		    0xf0UL
+	#define VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT		    4
+	u8 tos;
+	__be16 ip_id;
+	__be16 flags_frag_offset;
+	u8 ttl;
+	u8 protocol;
+	__be32 src_ip_addr;
+	__be32 dest_ip_addr;
+};
+
+/* VXLAN IPv6 encapsulation structure (32 bytes) */
+struct hwrm_vxlan_ipv6_hdr {
+	__be32 ver_tc_flow_label;
+	#define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_SFT	   0x1cUL
+	#define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_MASK	   0xf0000000UL
+	#define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_SFT	   0x14UL
+	#define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_MASK	   0xff00000UL
+	#define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_SFT   0x0UL
+	#define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK  0xfffffUL
+	__be16 payload_len;
+	u8 next_hdr;
+	u8 ttl;
+	__be32 src_ip_addr[4];
+	__be32 dest_ip_addr[4];
+};
+
+/* VXLAN encapsulation structure (72 bytes) */
+struct hwrm_cfa_encap_data_vxlan {
+	u8 src_mac_addr[6];
+	__le16 unused_0;
+	u8 dst_mac_addr[6];
+	u8 num_vlan_tags;
+	u8 unused_1;
+	__be16 ovlan_tpid;
+	__be16 ovlan_tci;
+	__be16 ivlan_tpid;
+	__be16 ivlan_tci;
+	__le32 l3[10];
+	#define CFA_ENCAP_DATA_VXLAN_L3_VER_MASK		   0xfUL
+	#define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV4		   0x4UL
+	#define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6		   0x6UL
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 vni;
+};
+
 /* Periodic Statistics Context DMA to host (160 bytes) */
 struct ctx_hw_stats {
 	__le64 rx_ucast_pkts;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 7dd3d131043a..d5031f436f83 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -16,6 +16,7 @@
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -23,8 +24,6 @@
 #include "bnxt_tc.h"
 #include "bnxt_vfr.h"
 
-#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
-
 #define BNXT_FID_INVALID			0xffff
 #define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
 
@@ -91,6 +90,23 @@ static void bnxt_tc_parse_vlan(struct bnxt *bp,
 	}
 }
 
+static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
+				    struct bnxt_tc_actions *actions,
+				    const struct tc_action *tc_act)
+{
+	struct ip_tunnel_info *tun_info = tcf_tunnel_info(tc_act);
+	struct ip_tunnel_key *tun_key = &tun_info->key;
+
+	if (ip_tunnel_info_af(tun_info) != AF_INET) {
+		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported");
+		return -EOPNOTSUPP;
+	}
+
+	actions->tun_encap_key = *tun_key;
+	actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
+	return 0;
+}
+
 static int bnxt_tc_parse_actions(struct bnxt *bp,
 				 struct bnxt_tc_actions *actions,
 				 struct tcf_exts *tc_exts)
@@ -125,9 +141,35 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 			bnxt_tc_parse_vlan(bp, actions, tc_act);
 			continue;
 		}
+
+		/* Tunnel encap */
+		if (is_tcf_tunnel_set(tc_act)) {
+			rc = bnxt_tc_parse_tunnel_set(bp, actions, tc_act);
+			if (rc)
+				return rc;
+			continue;
+		}
+
+		/* Tunnel decap */
+		if (is_tcf_tunnel_release(tc_act)) {
+			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
+			continue;
+		}
 	}
 
-	return 0;
+	if (rc)
+		return rc;
+
+	/* Tunnel encap/decap action must be accompanied by a redirect action */
+	if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP ||
+	     actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) &&
+	    !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) {
+		netdev_info(bp->dev,
+			    "error: no redir action along with encap/decap");
+		return -EINVAL;
+	}
+
+	return rc;
 }
 
 #define GET_KEY(flow_cmd, key_type)					\
@@ -254,6 +296,54 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 		flow->l4_mask.icmp.code = mask->code;
 	}
 
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_CONTROL);
+
+		addr_type = key->addr_type;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+		struct flow_dissector_key_ipv4_addrs *mask =
+				GET_MASK(tc_flow_cmd,
+					 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
+		flow->tun_key.u.ipv4.dst = key->dst;
+		flow->tun_mask.u.ipv4.dst = mask->dst;
+		flow->tun_key.u.ipv4.src = key->src;
+		flow->tun_mask.u.ipv4.src = mask->src;
+	} else if (dissector_uses_key(dissector,
+				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_dissector_key_keyid *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID);
+		struct flow_dissector_key_keyid *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
+		flow->tun_key.tun_id = key32_to_tunnel_id(key->keyid);
+		flow->tun_mask.tun_id = key32_to_tunnel_id(mask->keyid);
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS);
+		struct flow_dissector_key_ports *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
+		flow->tun_key.tp_dst = key->dst;
+		flow->tun_mask.tp_dst = mask->dst;
+		flow->tun_key.tp_src = key->src;
+		flow->tun_mask.tp_src = mask->src;
+	}
+
 	return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts);
 }
 
@@ -295,7 +385,8 @@ static bool is_wildcard(void *mask, int len)
 }
 
 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
-				    __le16 ref_flow_handle, __le16 *flow_handle)
+				    __le16 ref_flow_handle,
+				    __le32 tunnel_handle, __le16 *flow_handle)
 {
 	struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 	struct bnxt_tc_actions *actions = &flow->actions;
@@ -309,6 +400,14 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 
 	req.src_fid = cpu_to_le16(flow->src_fid);
 	req.ref_flow_handle = ref_flow_handle;
+
+	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
+	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
+		req.tunnel_handle = tunnel_handle;
+		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
+		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
+	}
+
 	req.ethertype = flow->l2_key.ether_type;
 	req.ip_proto = flow->l4_key.ip_proto;
 
@@ -405,78 +504,153 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 	return rc;
 }
 
-/* Add val to accum while handling a possible wraparound
- * of val. Eventhough val is of type u64, its actual width
- * is denoted by mask and will wrap-around beyond that width.
- */
-static void accumulate_val(u64 *accum, u64 val, u64 mask)
+static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
+				       struct bnxt_tc_flow *flow,
+				       struct bnxt_tc_l2_key *l2_info,
+				       __le32 ref_decap_handle,
+				       __le32 *decap_filter_handle)
 {
-#define low_bits(x, mask)		((x) & (mask))
-#define high_bits(x, mask)		((x) & ~(mask))
-	bool wrapped = val < low_bits(*accum, mask);
+	struct hwrm_cfa_decap_filter_alloc_output *resp =
+						bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
+	struct ip_tunnel_key *tun_key = &flow->tun_key;
+	u32 enables = 0;
+	int rc;
 
-	*accum = high_bits(*accum, mask) + val;
-	if (wrapped)
-		*accum += (mask + 1);
-}
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
 
-/* The HW counters' width is much less than 64bits.
- * Handle possible wrap-around while updating the stat counters
- */
-static void bnxt_flow_stats_fix_wraparound(struct bnxt_tc_info *tc_info,
-					   struct bnxt_tc_flow_stats *stats,
-					   struct bnxt_tc_flow_stats *hw_stats)
-{
-	accumulate_val(&stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
-	accumulate_val(&stats->packets, hw_stats->packets,
-		       tc_info->packets_mask);
+	req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
+	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
+		   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
+	req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
+	req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
+		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
+		/* tunnel_id is wrongly defined in hsi defn. as __le32 */
+		req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
+	}
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
+		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR |
+			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR;
+		ether_addr_copy(req.dst_macaddr, l2_info->dmac);
+		ether_addr_copy(req.src_macaddr, l2_info->smac);
+	}
+	if (l2_info->num_vlans) {
+		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
+		req.t_ivlan_vid = l2_info->inner_vlan_tci;
+	}
+
+	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
+	req.ethertype = htons(ETH_P_IP);
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
+		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
+			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
+			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
+		req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
+		req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
+		req.src_ipaddr[0] = tun_key->u.ipv4.src;
+	}
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
+		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
+		req.dst_port = tun_key->tp_dst;
+	}
+
+	/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
+	 * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
+	 */
+	req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
+	req.enables = cpu_to_le32(enables);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		*decap_filter_handle = resp->decap_filter_id;
+	else
+		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+	mutex_unlock(&bp->hwrm_cmd_lock);
+
+	return rc;
 }
 
-/* Fix possible wraparound of the stats queried from HW, calculate
- * the delta from prev_stats, and also update the prev_stats.
- * The HW flow stats are fetched under the hwrm_cmd_lock mutex.
- * This routine is best called while under the mutex so that the
- * stats processing happens atomically.
- */
-static void bnxt_flow_stats_calc(struct bnxt_tc_info *tc_info,
-				 struct bnxt_tc_flow *flow,
-				 struct bnxt_tc_flow_stats *stats)
+static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
+				      __le32 decap_filter_handle)
 {
-	struct bnxt_tc_flow_stats *acc_stats, *prev_stats;
+	struct hwrm_cfa_decap_filter_free_input req = { 0 };
+	int rc;
 
-	acc_stats = &flow->stats;
-	bnxt_flow_stats_fix_wraparound(tc_info, acc_stats, stats);
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
+	req.decap_filter_id = decap_filter_handle;
 
-	prev_stats = &flow->prev_stats;
-	stats->bytes = acc_stats->bytes - prev_stats->bytes;
-	stats->packets = acc_stats->packets - prev_stats->packets;
-	*prev_stats = *acc_stats;
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+	return rc;
 }
 
-static int bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp,
-					__le16 flow_handle,
-					struct bnxt_tc_flow *flow,
-					struct bnxt_tc_flow_stats *stats)
+static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
+				       struct ip_tunnel_key *encap_key,
+				       struct bnxt_tc_l2_key *l2_info,
+				       __le32 *encap_record_handle)
 {
-	struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr;
-	struct hwrm_cfa_flow_stats_input req = { 0 };
+	struct hwrm_cfa_encap_record_alloc_output *resp =
+						bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_encap_record_alloc_input req = { 0 };
+	struct hwrm_cfa_encap_data_vxlan *encap =
+			(struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
+	struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
+				(struct hwrm_vxlan_ipv4_hdr *)encap->l3;
 	int rc;
 
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
-	req.num_flows = cpu_to_le16(1);
-	req.flow_handle_0 = flow_handle;
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
 
-	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc) {
-		stats->packets = le64_to_cpu(resp->packet_0);
-		stats->bytes = le64_to_cpu(resp->byte_0);
-		bnxt_flow_stats_calc(&bp->tc_info, flow, stats);
-	} else {
-		netdev_info(bp->dev, "error rc=%d", rc);
+	req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
+
+	ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
+	ether_addr_copy(encap->src_mac_addr, l2_info->smac);
+	if (l2_info->num_vlans) {
+		encap->num_vlan_tags = l2_info->num_vlans;
+		encap->ovlan_tci = l2_info->inner_vlan_tci;
+		encap->ovlan_tpid = l2_info->inner_vlan_tpid;
 	}
 
+	encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
+	encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
+	encap_ipv4->ttl = encap_key->ttl;
+
+	encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
+	encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
+	encap_ipv4->protocol = IPPROTO_UDP;
+
+	encap->dst_port = encap_key->tp_dst;
+	encap->vni = tunnel_id_to_key32(encap_key->tun_id);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		*encap_record_handle = resp->encap_record_id;
+	else
+		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
 	mutex_unlock(&bp->hwrm_cmd_lock);
+
+	return rc;
+}
+
+static int hwrm_cfa_encap_record_free(struct bnxt *bp,
+				      __le32 encap_record_handle)
+{
+	struct hwrm_cfa_encap_record_free_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
+	req.encap_record_id = encap_record_handle;
+
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
 	return rc;
 }
 
@@ -484,7 +658,7 @@ static int bnxt_tc_put_l2_node(struct bnxt *bp,
 			       struct bnxt_tc_flow_node *flow_node)
 {
 	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
 	int rc;
 
 	/* remove flow_node from the L2 shared flow list */
@@ -521,7 +695,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
 		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
 					    ht_params);
 		if (rc) {
-			kfree(l2_node);
+			kfree_rcu(l2_node, rcu);
 			netdev_err(bp->dev,
 				   "Error: %s: rhashtable_insert_fast: %d",
 				   __func__, rc);
@@ -540,7 +714,7 @@ bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 			    struct bnxt_tc_flow_node *flow_node,
 			    __le16 *ref_flow_handle)
 {
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
 	struct bnxt_tc_flow_node *ref_flow_node;
 	struct bnxt_tc_l2_node *l2_node;
 
@@ -590,10 +764,386 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
 	return true;
 }
 
+/* Returns the final refcount of the node on success
+ * or a -ve error code on failure
+ */
+static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
+				   struct rhashtable *tunnel_table,
+				   struct rhashtable_params *ht_params,
+				   struct bnxt_tc_tunnel_node *tunnel_node)
+{
+	int rc;
+
+	if (--tunnel_node->refcount == 0) {
+		rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
+					     *ht_params);
+		if (rc) {
+			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
+			rc = -1;
+		}
+		kfree_rcu(tunnel_node, rcu);
+		return rc;
+	} else {
+		return tunnel_node->refcount;
+	}
+}
+
+/* Get (or add) either encap or decap tunnel node from/to the supplied
+ * hash table.
+ */
+static struct bnxt_tc_tunnel_node *
+bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
+			struct rhashtable_params *ht_params,
+			struct ip_tunnel_key *tun_key)
+{
+	struct bnxt_tc_tunnel_node *tunnel_node;
+	int rc;
+
+	tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
+	if (!tunnel_node) {
+		tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
+		if (!tunnel_node) {
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		tunnel_node->key = *tun_key;
+		tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
+		rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
+					    *ht_params);
+		if (rc) {
+			kfree_rcu(tunnel_node, rcu);
+			goto err;
+		}
+	}
+	tunnel_node->refcount++;
+	return tunnel_node;
+err:
+	netdev_info(bp->dev, "error rc=%d", rc);
+	return NULL;
+}
+
+static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
+					struct bnxt_tc_flow *flow,
+					struct bnxt_tc_l2_key *l2_key,
+					struct bnxt_tc_flow_node *flow_node,
+					__le32 *ref_decap_handle)
+{
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	struct bnxt_tc_flow_node *ref_flow_node;
+	struct bnxt_tc_l2_node *decap_l2_node;
+
+	decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
+					    tc_info->decap_l2_ht_params,
+					    l2_key);
+	if (!decap_l2_node)
+		return -1;
+
+	/* If any other flow is using this decap_l2_node, use it's decap_handle
+	 * as the ref_decap_handle
+	 */
+	if (decap_l2_node->refcount > 0) {
+		ref_flow_node =
+			list_first_entry(&decap_l2_node->common_l2_flows,
+					 struct bnxt_tc_flow_node,
+					 decap_l2_list_node);
+		*ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
+	} else {
+		*ref_decap_handle = INVALID_TUNNEL_HANDLE;
+	}
+
+	/* Insert the l2_node into the flow_node so that subsequent flows
+	 * with a matching decap l2 key can use the decap_filter_handle of
+	 * this flow as their ref_decap_handle
+	 */
+	flow_node->decap_l2_node = decap_l2_node;
+	list_add(&flow_node->decap_l2_list_node,
+		 &decap_l2_node->common_l2_flows);
+	decap_l2_node->refcount++;
+	return 0;
+}
+
+static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
+				      struct bnxt_tc_flow_node *flow_node)
+{
+	struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	int rc;
+
+	/* remove flow_node from the decap L2 sharing flow list */
+	list_del(&flow_node->decap_l2_list_node);
+	if (--decap_l2_node->refcount == 0) {
+		rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
+					     &decap_l2_node->node,
+					     tc_info->decap_l2_ht_params);
+		if (rc)
+			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
+		kfree_rcu(decap_l2_node, rcu);
+	}
+}
+
+static void bnxt_tc_put_decap_handle(struct bnxt *bp,
+				     struct bnxt_tc_flow_node *flow_node)
+{
+	__le32 decap_handle = flow_node->decap_node->tunnel_handle;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	int rc;
+
+	if (flow_node->decap_l2_node)
+		bnxt_tc_put_decap_l2_node(bp, flow_node);
+
+	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
+				     &tc_info->decap_ht_params,
+				     flow_node->decap_node);
+	if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
+		hwrm_cfa_decap_filter_free(bp, decap_handle);
+}
+
+static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
+				       struct ip_tunnel_key *tun_key,
+				       struct bnxt_tc_l2_key *l2_info,
+				       struct net_device *real_dst_dev)
+{
+#ifdef CONFIG_INET
+	struct flowi4 flow = { {0} };
+	struct net_device *dst_dev;
+	struct neighbour *nbr;
+	struct rtable *rt;
+	int rc;
+
+	flow.flowi4_proto = IPPROTO_UDP;
+	flow.fl4_dport = tun_key->tp_dst;
+	flow.daddr = tun_key->u.ipv4.dst;
+
+	rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
+	if (IS_ERR(rt)) {
+		netdev_info(bp->dev, "no route to %pI4b", &flow.daddr);
+		return -EOPNOTSUPP;
+	}
+
+	/* The route must either point to the real_dst_dev or a dst_dev that
+	 * uses the real_dst_dev.
+	 */
+	dst_dev = rt->dst.dev;
+	if (is_vlan_dev(dst_dev)) {
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+		struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
+
+		if (vlan->real_dev != real_dst_dev) {
+			netdev_info(bp->dev,
+				    "dst_dev(%s) doesn't use PF-if(%s)",
+				    netdev_name(dst_dev),
+				    netdev_name(real_dst_dev));
+			rc = -EOPNOTSUPP;
+			goto put_rt;
+		}
+		l2_info->inner_vlan_tci = htons(vlan->vlan_id);
+		l2_info->inner_vlan_tpid = vlan->vlan_proto;
+		l2_info->num_vlans = 1;
+#endif
+	} else if (dst_dev != real_dst_dev) {
+		netdev_info(bp->dev,
+			    "dst_dev(%s) for %pI4b is not PF-if(%s)",
+			    netdev_name(dst_dev), &flow.daddr,
+			    netdev_name(real_dst_dev));
+		rc = -EOPNOTSUPP;
+		goto put_rt;
+	}
+
+	nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
+	if (!nbr) {
+		netdev_info(bp->dev, "can't lookup neighbor for %pI4b",
+			    &flow.daddr);
+		rc = -EOPNOTSUPP;
+		goto put_rt;
+	}
+
+	tun_key->u.ipv4.src = flow.saddr;
+	tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
+	neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
+	ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
+	neigh_release(nbr);
+	ip_rt_put(rt);
+
+	return 0;
+put_rt:
+	ip_rt_put(rt);
+	return rc;
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
+static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				    struct bnxt_tc_flow_node *flow_node,
+				    __le32 *decap_filter_handle)
+{
+	struct ip_tunnel_key *decap_key = &flow->tun_key;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	struct bnxt_tc_l2_key l2_info = { {0} };
+	struct bnxt_tc_tunnel_node *decap_node;
+	struct ip_tunnel_key tun_key = { 0 };
+	struct bnxt_tc_l2_key *decap_l2_info;
+	__le32 ref_decap_handle;
+	int rc;
+
+	/* Check if there's another flow using the same tunnel decap.
+	 * If not, add this tunnel to the table and resolve the other
+	 * tunnel header fileds
+	 */
+	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
+					     &tc_info->decap_ht_params,
+					     decap_key);
+	if (!decap_node)
+		return -ENOMEM;
+
+	flow_node->decap_node = decap_node;
+
+	if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
+		goto done;
+
+	/* Resolve the L2 fields for tunnel decap
+	 * Resolve the route for remote vtep (saddr) of the decap key
+	 * Find it's next-hop mac addrs
+	 */
+	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
+	tun_key.tp_dst = flow->tun_key.tp_dst;
+	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev);
+	if (rc)
+		goto put_decap;
+
+	decap_key->ttl = tun_key.ttl;
+	decap_l2_info = &decap_node->l2_info;
+	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
+	ether_addr_copy(decap_l2_info->smac, l2_info.dmac);
+	if (l2_info.num_vlans) {
+		decap_l2_info->num_vlans = l2_info.num_vlans;
+		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
+		decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
+	}
+	flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
+
+	/* For getting a decap_filter_handle we first need to check if
+	 * there are any other decap flows that share the same tunnel L2
+	 * key and if so, pass that flow's decap_filter_handle as the
+	 * ref_decap_handle for this flow.
+	 */
+	rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
+					  &ref_decap_handle);
+	if (rc)
+		goto put_decap;
+
+	/* Issue the hwrm cmd to allocate a decap filter handle */
+	rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
+					 ref_decap_handle,
+					 &decap_node->tunnel_handle);
+	if (rc)
+		goto put_decap_l2;
+
+done:
+	*decap_filter_handle = decap_node->tunnel_handle;
+	return 0;
+
+put_decap_l2:
+	bnxt_tc_put_decap_l2_node(bp, flow_node);
+put_decap:
+	bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
+				&tc_info->decap_ht_params,
+				flow_node->decap_node);
+	return rc;
+}
+
+static void bnxt_tc_put_encap_handle(struct bnxt *bp,
+				     struct bnxt_tc_tunnel_node *encap_node)
+{
+	__le32 encap_handle = encap_node->tunnel_handle;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	int rc;
+
+	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
+				     &tc_info->encap_ht_params, encap_node);
+	if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
+		hwrm_cfa_encap_record_free(bp, encap_handle);
+}
+
+/* Lookup the tunnel encap table and check if there's an encap_handle
+ * alloc'd already.
+ * If not, query L2 info via a route lookup and issue an encap_record_alloc
+ * cmd to FW.
+ */
+static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				    struct bnxt_tc_flow_node *flow_node,
+				    __le32 *encap_handle)
+{
+	struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	struct bnxt_tc_tunnel_node *encap_node;
+	int rc;
+
+	/* Check if there's another flow using the same tunnel encap.
+	 * If not, add this tunnel to the table and resolve the other
+	 * tunnel header fileds
+	 */
+	encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
+					     &tc_info->encap_ht_params,
+					     encap_key);
+	if (!encap_node)
+		return -ENOMEM;
+
+	flow_node->encap_node = encap_node;
+
+	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
+		goto done;
+
+	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info,
+					 flow->actions.dst_dev);
+	if (rc)
+		goto put_encap;
+
+	/* Allocate a new tunnel encap record */
+	rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
+					 &encap_node->tunnel_handle);
+	if (rc)
+		goto put_encap;
+
+done:
+	*encap_handle = encap_node->tunnel_handle;
+	return 0;
+
+put_encap:
+	bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
+				&tc_info->encap_ht_params, encap_node);
+	return rc;
+}
+
+static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
+				      struct bnxt_tc_flow *flow,
+				      struct bnxt_tc_flow_node *flow_node)
+{
+	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
+		bnxt_tc_put_decap_handle(bp, flow_node);
+	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
+		bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
+}
+
+static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
+				     struct bnxt_tc_flow *flow,
+				     struct bnxt_tc_flow_node *flow_node,
+				     __le32 *tunnel_handle)
+{
+	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
+		return bnxt_tc_get_decap_handle(bp, flow, flow_node,
+						tunnel_handle);
+	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
+		return bnxt_tc_get_encap_handle(bp, flow, flow_node,
+						tunnel_handle);
+	else
+		return 0;
+}
 static int __bnxt_tc_del_flow(struct bnxt *bp,
 			      struct bnxt_tc_flow_node *flow_node)
 {
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
 	int rc;
 
 	/* send HWRM cmd to free the flow-id */
@@ -601,6 +1151,9 @@ static int __bnxt_tc_del_flow(struct bnxt *bp,
 
 	mutex_lock(&tc_info->lock);
 
+	/* release references to any tunnel encap/decap nodes */
+	bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
+
 	/* release reference to l2 node */
 	bnxt_tc_put_l2_node(bp, flow_node);
 
@@ -633,8 +1186,9 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 			    struct tc_cls_flower_offload *tc_flow_cmd)
 {
 	struct bnxt_tc_flow_node *new_node, *old_node;
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
 	struct bnxt_tc_flow *flow;
+	__le32 tunnel_handle = 0;
 	__le16 ref_flow_handle;
 	int rc;
 
@@ -672,12 +1226,19 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 	if (rc)
 		goto unlock;
 
+	/* If the flow involves tunnel encap/decap, get tunnel_handle */
+	rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
+	if (rc)
+		goto put_l2;
+
 	/* send HWRM cmd to alloc the flow */
 	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
-				      &new_node->flow_handle);
+				      tunnel_handle, &new_node->flow_handle);
 	if (rc)
-		goto put_l2;
+		goto put_tunnel;
 
+	flow->lastused = jiffies;
+	spin_lock_init(&flow->stats_lock);
 	/* add new flow to flow-table */
 	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
 				    tc_info->flow_ht_params);
@@ -689,12 +1250,14 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 
 hwrm_flow_free:
 	bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle);
+put_tunnel:
+	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
 put_l2:
 	bnxt_tc_put_l2_node(bp, new_node);
 unlock:
 	mutex_unlock(&tc_info->lock);
 free_node:
-	kfree(new_node);
+	kfree_rcu(new_node, rcu);
 done:
 	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d",
 		   __func__, tc_flow_cmd->cookie, rc);
@@ -704,7 +1267,7 @@ done:
 static int bnxt_tc_del_flow(struct bnxt *bp,
 			    struct tc_cls_flower_offload *tc_flow_cmd)
 {
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
 	struct bnxt_tc_flow_node *flow_node;
 
 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
@@ -722,10 +1285,11 @@ static int bnxt_tc_del_flow(struct bnxt *bp,
 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
 				  struct tc_cls_flower_offload *tc_flow_cmd)
 {
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
 	struct bnxt_tc_flow_node *flow_node;
-	struct bnxt_tc_flow_stats stats;
-	int rc;
+	struct bnxt_tc_flow *flow;
+	unsigned long lastused;
 
 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
 					   &tc_flow_cmd->cookie,
@@ -736,22 +1300,189 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp,
 		return -1;
 	}
 
-	rc = bnxt_hwrm_cfa_flow_stats_get(bp, flow_node->flow_handle,
-					  &flow_node->flow, &stats);
+	flow = &flow_node->flow;
+	curr_stats = &flow->stats;
+	prev_stats = &flow->prev_stats;
+
+	spin_lock(&flow->stats_lock);
+	stats.packets = curr_stats->packets - prev_stats->packets;
+	stats.bytes = curr_stats->bytes - prev_stats->bytes;
+	*prev_stats = *curr_stats;
+	lastused = flow->lastused;
+	spin_unlock(&flow->stats_lock);
+
+	tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets,
+			      lastused);
+	return 0;
+}
+
+static int
+bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
+			     struct bnxt_tc_stats_batch stats_batch[])
+{
+	struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_flow_stats_input req = { 0 };
+	__le16 *req_flow_handles = &req.flow_handle_0;
+	int rc, i;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
+	req.num_flows = cpu_to_le16(num_flows);
+	for (i = 0; i < num_flows; i++) {
+		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
+
+		req_flow_handles[i] = flow_node->flow_handle;
+	}
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		__le64 *resp_packets = &resp->packet_0;
+		__le64 *resp_bytes = &resp->byte_0;
+
+		for (i = 0; i < num_flows; i++) {
+			stats_batch[i].hw_stats.packets =
+						le64_to_cpu(resp_packets[i]);
+			stats_batch[i].hw_stats.bytes =
+						le64_to_cpu(resp_bytes[i]);
+		}
+	} else {
+		netdev_info(bp->dev, "error rc=%d", rc);
+	}
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+/* Add val to accum while handling a possible wraparound
+ * of val. Eventhough val is of type u64, its actual width
+ * is denoted by mask and will wrap-around beyond that width.
+ */
+static void accumulate_val(u64 *accum, u64 val, u64 mask)
+{
+#define low_bits(x, mask)		((x) & (mask))
+#define high_bits(x, mask)		((x) & ~(mask))
+	bool wrapped = val < low_bits(*accum, mask);
+
+	*accum = high_bits(*accum, mask) + val;
+	if (wrapped)
+		*accum += (mask + 1);
+}
+
+/* The HW counters' width is much less than 64bits.
+ * Handle possible wrap-around while updating the stat counters
+ */
+static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
+				  struct bnxt_tc_flow_stats *acc_stats,
+				  struct bnxt_tc_flow_stats *hw_stats)
+{
+	accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
+	accumulate_val(&acc_stats->packets, hw_stats->packets,
+		       tc_info->packets_mask);
+}
+
+static int
+bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
+				struct bnxt_tc_stats_batch stats_batch[])
+{
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	int rc, i;
+
+	rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
 	if (rc)
 		return rc;
 
-	tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, 0);
+	for (i = 0; i < num_flows; i++) {
+		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
+		struct bnxt_tc_flow *flow = &flow_node->flow;
+
+		spin_lock(&flow->stats_lock);
+		bnxt_flow_stats_accum(tc_info, &flow->stats,
+				      &stats_batch[i].hw_stats);
+		if (flow->stats.packets != flow->prev_stats.packets)
+			flow->lastused = jiffies;
+		spin_unlock(&flow->stats_lock);
+	}
+
 	return 0;
 }
 
+static int
+bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
+			      struct bnxt_tc_stats_batch stats_batch[],
+			      int *num_flows)
+{
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	struct rhashtable_iter *iter = &tc_info->iter;
+	void *flow_node;
+	int rc, i;
+
+	rc = rhashtable_walk_start(iter);
+	if (rc && rc != -EAGAIN) {
+		i = 0;
+		goto done;
+	}
+
+	rc = 0;
+	for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
+		flow_node = rhashtable_walk_next(iter);
+		if (IS_ERR(flow_node)) {
+			i = 0;
+			if (PTR_ERR(flow_node) == -EAGAIN) {
+				continue;
+			} else {
+				rc = PTR_ERR(flow_node);
+				goto done;
+			}
+		}
+
+		/* No more flows */
+		if (!flow_node)
+			goto done;
+
+		stats_batch[i].flow_node = flow_node;
+	}
+done:
+	rhashtable_walk_stop(iter);
+	*num_flows = i;
+	return rc;
+}
+
+void bnxt_tc_flow_stats_work(struct bnxt *bp)
+{
+	struct bnxt_tc_info *tc_info = bp->tc_info;
+	int num_flows, rc;
+
+	num_flows = atomic_read(&tc_info->flow_table.nelems);
+	if (!num_flows)
+		return;
+
+	rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
+
+	for (;;) {
+		rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
+						   &num_flows);
+		if (rc) {
+			if (rc == -EAGAIN)
+				continue;
+			break;
+		}
+
+		if (!num_flows)
+			break;
+
+		bnxt_tc_flow_stats_batch_update(bp, num_flows,
+						tc_info->stats_batch);
+	}
+
+	rhashtable_walk_exit(&tc_info->iter);
+}
+
 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
 			 struct tc_cls_flower_offload *cls_flower)
 {
 	int rc = 0;
 
-	if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
-	    cls_flower->common.chain_index)
+	if (cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_flower->command) {
@@ -784,19 +1515,37 @@ static const struct rhashtable_params bnxt_tc_l2_ht_params = {
 	.automatic_shrinking = true
 };
 
+static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
+	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
+	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
+	.key_len = BNXT_TC_L2_KEY_LEN,
+	.automatic_shrinking = true
+};
+
+static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
+	.head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
+	.key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
+	.key_len = sizeof(struct ip_tunnel_key),
+	.automatic_shrinking = true
+};
+
 /* convert counter width in bits to a mask */
 #define mask(width)		((u64)~0 >> (64 - (width)))
 
 int bnxt_init_tc(struct bnxt *bp)
 {
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_info *tc_info;
 	int rc;
 
-	if (bp->hwrm_spec_code < 0x10800) {
+	if (bp->hwrm_spec_code < 0x10803) {
 		netdev_warn(bp->dev,
 			    "Firmware does not support TC flower offload.\n");
 		return -ENOTSUPP;
 	}
+
+	tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
+	if (!tc_info)
+		return -ENOMEM;
 	mutex_init(&tc_info->lock);
 
 	/* Counter widths are programmed by FW */
@@ -806,33 +1555,62 @@ int bnxt_init_tc(struct bnxt *bp)
 	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
 	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
 	if (rc)
-		return rc;
+		goto free_tc_info;
 
 	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
 	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
 	if (rc)
 		goto destroy_flow_table;
 
+	tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
+	rc = rhashtable_init(&tc_info->decap_l2_table,
+			     &tc_info->decap_l2_ht_params);
+	if (rc)
+		goto destroy_l2_table;
+
+	tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
+	rc = rhashtable_init(&tc_info->decap_table,
+			     &tc_info->decap_ht_params);
+	if (rc)
+		goto destroy_decap_l2_table;
+
+	tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
+	rc = rhashtable_init(&tc_info->encap_table,
+			     &tc_info->encap_ht_params);
+	if (rc)
+		goto destroy_decap_table;
+
 	tc_info->enabled = true;
 	bp->dev->hw_features |= NETIF_F_HW_TC;
 	bp->dev->features |= NETIF_F_HW_TC;
+	bp->tc_info = tc_info;
 	return 0;
 
+destroy_decap_table:
+	rhashtable_destroy(&tc_info->decap_table);
+destroy_decap_l2_table:
+	rhashtable_destroy(&tc_info->decap_l2_table);
+destroy_l2_table:
+	rhashtable_destroy(&tc_info->l2_table);
 destroy_flow_table:
 	rhashtable_destroy(&tc_info->flow_table);
+free_tc_info:
+	kfree(tc_info);
 	return rc;
 }
 
 void bnxt_shutdown_tc(struct bnxt *bp)
 {
-	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_info *tc_info = bp->tc_info;
 
-	if (!tc_info->enabled)
+	if (!bnxt_tc_flower_enabled(bp))
 		return;
 
 	rhashtable_destroy(&tc_info->flow_table);
 	rhashtable_destroy(&tc_info->l2_table);
+	rhashtable_destroy(&tc_info->decap_l2_table);
+	rhashtable_destroy(&tc_info->decap_table);
+	rhashtable_destroy(&tc_info->encap_table);
+	kfree(tc_info);
+	bp->tc_info = NULL;
 }
-
-#else
-#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
index 6c4c1ed279ef..97e09a880693 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
@@ -12,6 +12,8 @@
 
 #ifdef CONFIG_BNXT_FLOWER_OFFLOAD
 
+#include <net/ip_tunnels.h>
+
 /* Structs used for storing the filter/actions of the TC cmd.
  */
 struct bnxt_tc_l2_key {
@@ -50,6 +52,13 @@ struct bnxt_tc_l4_key {
 	};
 };
 
+struct bnxt_tc_tunnel_key {
+	struct bnxt_tc_l2_key	l2;
+	struct bnxt_tc_l3_key	l3;
+	struct bnxt_tc_l4_key	l4;
+	__be32			id;
+};
+
 struct bnxt_tc_actions {
 	u32				flags;
 #define BNXT_TC_ACTION_FLAG_FWD			BIT(0)
@@ -57,16 +66,16 @@ struct bnxt_tc_actions {
 #define BNXT_TC_ACTION_FLAG_PUSH_VLAN		BIT(3)
 #define BNXT_TC_ACTION_FLAG_POP_VLAN		BIT(4)
 #define BNXT_TC_ACTION_FLAG_DROP		BIT(5)
+#define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP	BIT(6)
+#define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP	BIT(7)
 
 	u16				dst_fid;
 	struct net_device		*dst_dev;
 	__be16				push_vlan_tpid;
 	__be16				push_vlan_tci;
-};
 
-struct bnxt_tc_flow_stats {
-	u64		packets;
-	u64		bytes;
+	/* tunnel encap */
+	struct ip_tunnel_key		tun_encap_key;
 };
 
 struct bnxt_tc_flow {
@@ -76,6 +85,16 @@ struct bnxt_tc_flow {
 #define BNXT_TC_FLOW_FLAGS_IPV6_ADDRS		BIT(3)
 #define BNXT_TC_FLOW_FLAGS_PORTS		BIT(4)
 #define BNXT_TC_FLOW_FLAGS_ICMP			BIT(5)
+#define BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS	BIT(6)
+#define BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS	BIT(7)
+#define BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS	BIT(8)
+#define BNXT_TC_FLOW_FLAGS_TUNL_PORTS		BIT(9)
+#define BNXT_TC_FLOW_FLAGS_TUNL_ID		BIT(10)
+#define BNXT_TC_FLOW_FLAGS_TUNNEL	(BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS | \
+					 BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS | \
+					 BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS |\
+					 BNXT_TC_FLOW_FLAGS_TUNL_PORTS |\
+					 BNXT_TC_FLOW_FLAGS_TUNL_ID)
 
 	/* flow applicable to pkts ingressing on this fid */
 	u16				src_fid;
@@ -85,6 +104,8 @@ struct bnxt_tc_flow {
 	struct bnxt_tc_l3_key		l3_mask;
 	struct bnxt_tc_l4_key		l4_key;
 	struct bnxt_tc_l4_key		l4_mask;
+	struct ip_tunnel_key		tun_key;
+	struct ip_tunnel_key		tun_mask;
 
 	struct bnxt_tc_actions		actions;
 
@@ -93,13 +114,39 @@ struct bnxt_tc_flow {
 	/* previous snap-shot of stats */
 	struct bnxt_tc_flow_stats	prev_stats;
 	unsigned long			lastused; /* jiffies */
+	/* for calculating delta from prev_stats and
+	 * updating prev_stats atomically.
+	 */
+	spinlock_t			stats_lock;
+};
+
+/* Tunnel encap/decap hash table
+ * This table is used to maintain a list of flows that use
+ * the same tunnel encap/decap params (ip_daddrs, vni, udp_dport)
+ * and the FW returned handle.
+ * A separate table is maintained for encap and decap
+ */
+struct bnxt_tc_tunnel_node {
+	struct ip_tunnel_key		key;
+	struct rhash_head		node;
+
+	/* tunnel l2 info */
+	struct bnxt_tc_l2_key		l2_info;
+
+#define	INVALID_TUNNEL_HANDLE		cpu_to_le32(0xffffffff)
+	/* tunnel handle returned by FW */
+	__le32				tunnel_handle;
+
+	u32				refcount;
+	struct rcu_head			rcu;
 };
 
 /* L2 hash table
- * This data-struct is used for L2-flow table.
- * The L2 part of a flow is stored in a hash table.
+ * The same data-struct is used for L2-flow table and L2-tunnel table.
+ * The L2 part of a flow or tunnel is stored in a hash table.
  * A flow that shares the same L2 key/mask with an
- * already existing flow must refer to it's flow handle.
+ * already existing flow/tunnel must refer to it's flow handle or
+ * decap_filter_id respectively.
  */
 struct bnxt_tc_l2_node {
 	/* hash key: first 16b of key */
@@ -110,7 +157,7 @@ struct bnxt_tc_l2_node {
 	/* a linked list of flows that share the same l2 key */
 	struct list_head	common_l2_flows;
 
-	/* number of flows sharing the l2 key */
+	/* number of flows/tunnels sharing the l2 key */
 	u16			refcount;
 
 	struct rcu_head		rcu;
@@ -130,6 +177,16 @@ struct bnxt_tc_flow_node {
 	/* for the shared_flows list maintained in l2_node */
 	struct list_head		l2_list_node;
 
+	/* tunnel encap related */
+	struct bnxt_tc_tunnel_node	*encap_node;
+
+	/* tunnel decap related */
+	struct bnxt_tc_tunnel_node	*decap_node;
+	/* L2 node in tunnel-l2 hashtable that shares flow's tunnel l2 key */
+	struct bnxt_tc_l2_node		*decap_l2_node;
+	/* for the shared_flows list maintained in tunnel decap l2_node */
+	struct list_head		decap_l2_list_node;
+
 	struct rcu_head			rcu;
 };
 
@@ -137,6 +194,12 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
 			 struct tc_cls_flower_offload *cls_flower);
 int bnxt_init_tc(struct bnxt *bp);
 void bnxt_shutdown_tc(struct bnxt *bp);
+void bnxt_tc_flow_stats_work(struct bnxt *bp);
+
+static inline bool bnxt_tc_flower_enabled(struct bnxt *bp)
+{
+	return bp->tc_info && bp->tc_info->enabled;
+}
 
 #else /* CONFIG_BNXT_FLOWER_OFFLOAD */
 
@@ -154,5 +217,14 @@ static inline int bnxt_init_tc(struct bnxt *bp)
 static inline void bnxt_shutdown_tc(struct bnxt *bp)
 {
 }
+
+static inline void bnxt_tc_flow_stats_work(struct bnxt *bp)
+{
+}
+
+static inline bool bnxt_tc_flower_enabled(struct bnxt *bp)
+{
+	return false;
+}
 #endif /* CONFIG_BNXT_FLOWER_OFFLOAD */
 #endif /* BNXT_TC_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index e75db04c6cdc..69186d188c43 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -16,6 +16,7 @@
 #include "bnxt_hsi.h"
 #include "bnxt.h"
 #include "bnxt_vfr.h"
+#include "bnxt_devlink.h"
 #include "bnxt_tc.h"
 
 #ifdef CONFIG_BNXT_SRIOV
@@ -115,13 +116,17 @@ bnxt_vf_rep_get_stats64(struct net_device *dev,
 	stats->tx_bytes = vf_rep->tx_stats.bytes;
 }
 
-static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
-				void *type_data)
+static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type,
+					 void *type_data,
+					 void *cb_priv)
 {
-	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct bnxt_vf_rep *vf_rep = cb_priv;
 	struct bnxt *bp = vf_rep->bp;
 	int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid;
 
+	if (!bnxt_tc_flower_enabled(vf_rep->bp) || !tc_can_offload(bp->dev))
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return bnxt_tc_setup_flower(bp, vf_fid, type_data);
@@ -130,6 +135,39 @@ static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	}
 }
 
+static int bnxt_vf_rep_setup_tc_block(struct net_device *dev,
+				      struct tc_block_offload *f)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block,
+					     bnxt_vf_rep_setup_tc_block_cb,
+					     vf_rep, vf_rep);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block,
+					bnxt_vf_rep_setup_tc_block_cb, vf_rep);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+				void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return bnxt_vf_rep_setup_tc_block(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
 {
 	u16 vf_idx;
@@ -416,7 +454,7 @@ err:
 }
 
 /* Devlink related routines */
-static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
 	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
 
@@ -424,7 +462,7 @@ static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 	return 0;
 }
 
-static int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
+int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
 	int rc = 0;
@@ -462,52 +500,4 @@ done:
 	return rc;
 }
 
-static const struct devlink_ops bnxt_dl_ops = {
-	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
-	.eswitch_mode_get = bnxt_dl_eswitch_mode_get
-};
-
-int bnxt_dl_register(struct bnxt *bp)
-{
-	struct devlink *dl;
-	int rc;
-
-	if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
-		return 0;
-
-	if (bp->hwrm_spec_code < 0x10800) {
-		netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n");
-		return -ENOTSUPP;
-	}
-
-	dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
-	if (!dl) {
-		netdev_warn(bp->dev, "devlink_alloc failed");
-		return -ENOMEM;
-	}
-
-	bnxt_link_bp_to_dl(bp, dl);
-	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
-	rc = devlink_register(dl, &bp->pdev->dev);
-	if (rc) {
-		bnxt_link_bp_to_dl(bp, NULL);
-		devlink_free(dl);
-		netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
-		return rc;
-	}
-
-	return 0;
-}
-
-void bnxt_dl_unregister(struct bnxt *bp)
-{
-	struct devlink *dl = bp->dl;
-
-	if (!dl)
-		return;
-
-	devlink_unregister(dl);
-	devlink_free(dl);
-}
-
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
index 7787cd24606a..fb06bbe70e42 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
@@ -14,31 +14,6 @@
 
 #define	MAX_CFA_CODE			65536
 
-/* Struct to hold housekeeping info needed by devlink interface */
-struct bnxt_dl {
-	struct bnxt *bp;	/* back ptr to the controlling dev */
-};
-
-static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
-{
-	return ((struct bnxt_dl *)devlink_priv(dl))->bp;
-}
-
-/* To clear devlink pointer from bp, pass NULL dl */
-static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
-{
-	bp->dl = dl;
-
-	/* add a back pointer in dl to bp */
-	if (dl) {
-		struct bnxt_dl *bp_dl = devlink_priv(dl);
-
-		bp_dl->bp = bp;
-	}
-}
-
-int bnxt_dl_register(struct bnxt *bp);
-void bnxt_dl_unregister(struct bnxt *bp);
 void bnxt_vf_reps_destroy(struct bnxt *bp);
 void bnxt_vf_reps_close(struct bnxt *bp);
 void bnxt_vf_reps_open(struct bnxt *bp);
@@ -53,16 +28,10 @@ static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev)
 	return bp->pf.vf[vf_rep->vf_idx].fw_fid;
 }
 
-#else
-
-static inline int bnxt_dl_register(struct bnxt *bp)
-{
-	return 0;
-}
+int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode);
 
-static inline void bnxt_dl_unregister(struct bnxt *bp)
-{
-}
+#else
 
 static inline void bnxt_vf_reps_close(struct bnxt *bp)
 {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index d8f0c837b72c..261e5847557a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -94,6 +94,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 
 	xdp.data_hard_start = *data_ptr - offset;
 	xdp.data = *data_ptr;
+	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = *data_ptr + *len;
 	orig_data = xdp.data;
 	mapping = rx_buf->mapping - bp->rx_dma_offset;
@@ -207,7 +208,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 	return 0;
 }
 
-int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	int rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
index 12a5ad66b564..414b748038ca 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
@@ -16,6 +16,6 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
 bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 		 struct page *page, u8 **data_ptr, unsigned int *len,
 		 u8 *event);
-int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp);
+int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp);
 
 #endif
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 9cebca896913..24b4f4ceceef 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -488,15 +488,13 @@ static void bcmgenet_complete(struct net_device *dev)
 static int bcmgenet_get_link_ksettings(struct net_device *dev,
 				       struct ethtool_link_ksettings *cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -ENODEV;
 
-	phy_ethtool_ksettings_get(priv->phydev, cmd);
+	phy_ethtool_ksettings_get(dev->phydev, cmd);
 
 	return 0;
 }
@@ -504,15 +502,13 @@ static int bcmgenet_get_link_ksettings(struct net_device *dev,
 static int bcmgenet_set_link_ksettings(struct net_device *dev,
 				       const struct ethtool_link_ksettings *cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_ksettings_set(priv->phydev, cmd);
+	return phy_ethtool_ksettings_set(dev->phydev, cmd);
 }
 
 static int bcmgenet_set_rx_csum(struct net_device *dev,
@@ -1042,11 +1038,14 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (GENET_IS_V1(priv))
 		return -EOPNOTSUPP;
 
+	if (!dev->phydev)
+		return -ENODEV;
+
 	e->eee_enabled = p->eee_enabled;
 	e->eee_active = p->eee_active;
 	e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
 
-	return phy_ethtool_get_eee(priv->phydev, e);
+	return phy_ethtool_get_eee(dev->phydev, e);
 }
 
 static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -1058,12 +1057,15 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	if (GENET_IS_V1(priv))
 		return -EOPNOTSUPP;
 
+	if (!dev->phydev)
+		return -ENODEV;
+
 	p->eee_enabled = e->eee_enabled;
 
 	if (!p->eee_enabled) {
 		bcmgenet_eee_enable_set(dev, false);
 	} else {
-		ret = phy_init_eee(priv->phydev, 0);
+		ret = phy_init_eee(dev->phydev, 0);
 		if (ret) {
 			netif_err(priv, hw, dev, "EEE initialization failed\n");
 			return ret;
@@ -1073,7 +1075,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 		bcmgenet_eee_enable_set(dev, true);
 	}
 
-	return phy_ethtool_set_eee(priv->phydev, e);
+	return phy_ethtool_set_eee(dev->phydev, e);
 }
 
 /* standard ethtool support functions. */
@@ -1107,7 +1109,7 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv,
 
 	switch (mode) {
 	case GENET_POWER_CABLE_SENSE:
-		phy_detach(priv->phydev);
+		phy_detach(priv->dev->phydev);
 		break;
 
 	case GENET_POWER_WOL_MAGIC:
@@ -1172,7 +1174,6 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 		}
 		bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
 		bcmgenet_phy_power_set(priv->dev, true);
-		bcmgenet_mii_reset(priv->dev);
 		break;
 
 	case GENET_POWER_CABLE_SENSE:
@@ -1193,15 +1194,13 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 /* ioctl handle special commands that are not present in ethtool. */
 static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(priv->phydev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv,
@@ -1405,11 +1404,10 @@ static unsigned int bcmgenet_tx_reclaim(struct net_device *dev,
 				struct bcmgenet_tx_ring *ring)
 {
 	unsigned int released;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ring->lock, flags);
+	spin_lock_bh(&ring->lock);
 	released = __bcmgenet_tx_reclaim(dev, ring);
-	spin_unlock_irqrestore(&ring->lock, flags);
+	spin_unlock_bh(&ring->lock);
 
 	return released;
 }
@@ -1420,15 +1418,14 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
 		container_of(napi, struct bcmgenet_tx_ring, napi);
 	unsigned int work_done = 0;
 	struct netdev_queue *txq;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ring->lock, flags);
+	spin_lock(&ring->lock);
 	work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring);
 	if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
 		txq = netdev_get_tx_queue(ring->priv->dev, ring->queue);
 		netif_tx_wake_queue(txq);
 	}
-	spin_unlock_irqrestore(&ring->lock, flags);
+	spin_unlock(&ring->lock);
 
 	if (work_done == 0) {
 		napi_complete(napi);
@@ -1523,7 +1520,6 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct bcmgenet_tx_ring *ring = NULL;
 	struct enet_cb *tx_cb_ptr;
 	struct netdev_queue *txq;
-	unsigned long flags = 0;
 	int nr_frags, index;
 	dma_addr_t mapping;
 	unsigned int size;
@@ -1550,7 +1546,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
 
-	spin_lock_irqsave(&ring->lock, flags);
+	spin_lock(&ring->lock);
 	if (ring->free_bds <= (nr_frags + 1)) {
 		if (!netif_tx_queue_stopped(txq)) {
 			netif_tx_stop_queue(txq);
@@ -1584,8 +1580,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 	for (i = 0; i <= nr_frags; i++) {
 		tx_cb_ptr = bcmgenet_get_txcb(priv, ring);
 
-		if (unlikely(!tx_cb_ptr))
-			BUG();
+		BUG_ON(!tx_cb_ptr);
 
 		if (!i) {
 			/* Transmit single SKB or head of fragment list */
@@ -1645,7 +1640,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
 		bcmgenet_tdma_ring_writel(priv, ring->index,
 					  ring->prod_index, TDMA_PROD_INDEX);
 out:
-	spin_unlock_irqrestore(&ring->lock, flags);
+	spin_unlock(&ring->lock);
 
 	return ret;
 
@@ -1935,12 +1930,8 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable)
 		usleep_range(1000, 2000);
 }
 
-static int reset_umac(struct bcmgenet_priv *priv)
+static void reset_umac(struct bcmgenet_priv *priv)
 {
-	struct device *kdev = &priv->pdev->dev;
-	unsigned int timeout = 0;
-	u32 reg;
-
 	/* 7358a0/7552a0: bad default in RBUF_FLUSH_CTRL.umac_sw_rst */
 	bcmgenet_rbuf_ctrl_set(priv, 0);
 	udelay(10);
@@ -1948,23 +1939,10 @@ static int reset_umac(struct bcmgenet_priv *priv)
 	/* disable MAC while updating its registers */
 	bcmgenet_umac_writel(priv, 0, UMAC_CMD);
 
-	/* issue soft reset, wait for it to complete */
-	bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD);
-	while (timeout++ < 1000) {
-		reg = bcmgenet_umac_readl(priv, UMAC_CMD);
-		if (!(reg & CMD_SW_RESET))
-			return 0;
-
-		udelay(1);
-	}
-
-	if (timeout == 1000) {
-		dev_err(kdev,
-			"timeout waiting for MAC to come out of reset\n");
-		return -ETIMEDOUT;
-	}
-
-	return 0;
+	/* issue soft reset with (rg)mii loopback to ensure a stable rxclk */
+	bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD);
+	udelay(2);
+	bcmgenet_umac_writel(priv, 0, UMAC_CMD);
 }
 
 static void bcmgenet_intr_disable(struct bcmgenet_priv *priv)
@@ -1994,20 +1972,16 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv)
 	bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
 }
 
-static int init_umac(struct bcmgenet_priv *priv)
+static void init_umac(struct bcmgenet_priv *priv)
 {
 	struct device *kdev = &priv->pdev->dev;
-	int ret;
 	u32 reg;
 	u32 int0_enable = 0;
 
 	dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n");
 
-	ret = reset_umac(priv);
-	if (ret)
-		return ret;
+	reset_umac(priv);
 
-	bcmgenet_umac_writel(priv, 0, UMAC_CMD);
 	/* clear tx/rx counter */
 	bcmgenet_umac_writel(priv,
 			     MIB_RESET_RX | MIB_RESET_TX | MIB_RESET_RUNT,
@@ -2046,8 +2020,6 @@ static int init_umac(struct bcmgenet_priv *priv)
 	bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
 
 	dev_dbg(kdev, "done init umac\n");
-
-	return 0;
 }
 
 /* Initialize a Tx ring along with corresponding hardware registers */
@@ -2104,6 +2076,10 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
 				  TDMA_WRITE_PTR);
 	bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
 				  DMA_END_ADDR);
+
+	/* Initialize Tx NAPI */
+	netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll,
+		       NAPI_POLL_WEIGHT);
 }
 
 /* Initialize a RDMA ring */
@@ -2135,6 +2111,10 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
 	if (ret)
 		return ret;
 
+	/* Initialize Rx NAPI */
+	netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll,
+		       NAPI_POLL_WEIGHT);
+
 	bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_PROD_INDEX);
 	bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_CONS_INDEX);
 	bcmgenet_rdma_ring_writel(priv, index, 1, DMA_MBUF_DONE_THRESH);
@@ -2159,50 +2139,27 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
 	return ret;
 }
 
-static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv)
-{
-	unsigned int i;
-	struct bcmgenet_tx_ring *ring;
-
-	for (i = 0; i < priv->hw_params->tx_queues; ++i) {
-		ring = &priv->tx_rings[i];
-		netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
-	}
-
-	ring = &priv->tx_rings[DESC_INDEX];
-	netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
-}
-
 static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv)
 {
 	unsigned int i;
-	u32 int0_enable = UMAC_IRQ_TXDMA_DONE;
-	u32 int1_enable = 0;
 	struct bcmgenet_tx_ring *ring;
 
 	for (i = 0; i < priv->hw_params->tx_queues; ++i) {
 		ring = &priv->tx_rings[i];
 		napi_enable(&ring->napi);
-		int1_enable |= (1 << i);
+		ring->int_enable(ring);
 	}
 
 	ring = &priv->tx_rings[DESC_INDEX];
 	napi_enable(&ring->napi);
-
-	bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
-	bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR);
+	ring->int_enable(ring);
 }
 
 static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv)
 {
 	unsigned int i;
-	u32 int0_disable = UMAC_IRQ_TXDMA_DONE;
-	u32 int1_disable = 0xffff;
 	struct bcmgenet_tx_ring *ring;
 
-	bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET);
-	bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET);
-
 	for (i = 0; i < priv->hw_params->tx_queues; ++i) {
 		ring = &priv->tx_rings[i];
 		napi_disable(&ring->napi);
@@ -2286,9 +2243,6 @@ static void bcmgenet_init_tx_queues(struct net_device *dev)
 	bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1);
 	bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2);
 
-	/* Initialize Tx NAPI */
-	bcmgenet_init_tx_napi(priv);
-
 	/* Enable Tx queues */
 	bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG);
 
@@ -2298,50 +2252,27 @@ static void bcmgenet_init_tx_queues(struct net_device *dev)
 	bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL);
 }
 
-static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv)
-{
-	unsigned int i;
-	struct bcmgenet_rx_ring *ring;
-
-	for (i = 0; i < priv->hw_params->rx_queues; ++i) {
-		ring = &priv->rx_rings[i];
-		netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64);
-	}
-
-	ring = &priv->rx_rings[DESC_INDEX];
-	netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64);
-}
-
 static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv)
 {
 	unsigned int i;
-	u32 int0_enable = UMAC_IRQ_RXDMA_DONE;
-	u32 int1_enable = 0;
 	struct bcmgenet_rx_ring *ring;
 
 	for (i = 0; i < priv->hw_params->rx_queues; ++i) {
 		ring = &priv->rx_rings[i];
 		napi_enable(&ring->napi);
-		int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i));
+		ring->int_enable(ring);
 	}
 
 	ring = &priv->rx_rings[DESC_INDEX];
 	napi_enable(&ring->napi);
-
-	bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR);
-	bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR);
+	ring->int_enable(ring);
 }
 
 static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv)
 {
 	unsigned int i;
-	u32 int0_disable = UMAC_IRQ_RXDMA_DONE;
-	u32 int1_disable = 0xffff << UMAC_IRQ1_RX_INTR_SHIFT;
 	struct bcmgenet_rx_ring *ring;
 
-	bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET);
-	bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET);
-
 	for (i = 0; i < priv->hw_params->rx_queues; ++i) {
 		ring = &priv->rx_rings[i];
 		napi_disable(&ring->napi);
@@ -2414,9 +2345,6 @@ static int bcmgenet_init_rx_queues(struct net_device *dev)
 	ring_cfg |= (1 << DESC_INDEX);
 	dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT));
 
-	/* Initialize Rx NAPI */
-	bcmgenet_init_rx_napi(priv);
-
 	/* Enable rings */
 	bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG);
 
@@ -2505,9 +2433,6 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
 	bcmgenet_fini_rx_napi(priv);
 	bcmgenet_fini_tx_napi(priv);
 
-	/* disable DMA */
-	bcmgenet_dma_teardown(priv);
-
 	for (i = 0; i < priv->num_tx_bds; i++) {
 		cb = priv->tx_cbs + i;
 		skb = bcmgenet_free_tx_cb(&priv->pdev->dev, cb);
@@ -2590,27 +2515,20 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
 /* Interrupt bottom half */
 static void bcmgenet_irq_task(struct work_struct *work)
 {
-	unsigned long flags;
 	unsigned int status;
 	struct bcmgenet_priv *priv = container_of(
 			work, struct bcmgenet_priv, bcmgenet_irq_work);
 
 	netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	status = priv->irq0_stat;
 	priv->irq0_stat = 0;
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	if (status & UMAC_IRQ_MPD_R) {
-		netif_dbg(priv, wol, priv->dev,
-			  "magic packet detected, waking up\n");
-		bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
-	}
+	spin_unlock_irq(&priv->lock);
 
 	/* Link UP/DOWN event */
 	if (status & UMAC_IRQ_LINK_EVENT)
-		phy_mac_interrupt(priv->phydev,
+		phy_mac_interrupt(priv->dev->phydev,
 				  !!(status & UMAC_IRQ_LINK_UP));
 }
 
@@ -2698,23 +2616,13 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 		}
 	}
 
-	if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
-				UMAC_IRQ_PHY_DET_F |
-				UMAC_IRQ_LINK_EVENT |
-				UMAC_IRQ_HFB_SM |
-				UMAC_IRQ_HFB_MM)) {
-		/* all other interested interrupts handled in bottom half */
-		schedule_work(&priv->bcmgenet_irq_work);
-	}
-
 	if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
 		status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
 		wake_up(&priv->wq);
 	}
 
 	/* all other interested interrupts handled in bottom half */
-	status &= (UMAC_IRQ_LINK_EVENT |
-		   UMAC_IRQ_MPD_R);
+	status &= UMAC_IRQ_LINK_EVENT;
 	if (status) {
 		/* Save irq status for bottom-half processing. */
 		spin_lock_irqsave(&priv->lock, flags);
@@ -2849,16 +2757,16 @@ static void bcmgenet_netif_start(struct net_device *dev)
 
 	/* Start the network engine */
 	bcmgenet_enable_rx_napi(priv);
-	bcmgenet_enable_tx_napi(priv);
 
 	umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
 
 	netif_tx_start_all_queues(dev);
+	bcmgenet_enable_tx_napi(priv);
 
 	/* Monitor link interrupts now */
 	bcmgenet_link_intr_enable(priv);
 
-	phy_start(priv->phydev);
+	phy_start(dev->phydev);
 }
 
 static int bcmgenet_open(struct net_device *dev)
@@ -2882,12 +2790,7 @@ static int bcmgenet_open(struct net_device *dev)
 	/* take MAC out of reset */
 	bcmgenet_umac_reset(priv);
 
-	ret = init_umac(priv);
-	if (ret)
-		goto err_clk_disable;
-
-	/* disable ethernet MAC while updating its registers */
-	umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false);
+	init_umac(priv);
 
 	/* Make sure we reflect the value of CRC_CMD_FWD */
 	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
@@ -2946,6 +2849,7 @@ err_irq1:
 err_irq0:
 	free_irq(priv->irq0, priv);
 err_fini_dma:
+	bcmgenet_dma_teardown(priv);
 	bcmgenet_fini_dma(priv);
 err_clk_disable:
 	if (priv->internal_phy)
@@ -2958,11 +2862,20 @@ static void bcmgenet_netif_stop(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
+	bcmgenet_disable_tx_napi(priv);
 	netif_tx_stop_all_queues(dev);
-	phy_stop(priv->phydev);
-	bcmgenet_intr_disable(priv);
+
+	/* Disable MAC receive */
+	umac_enable_set(priv, CMD_RX_EN, false);
+
+	bcmgenet_dma_teardown(priv);
+
+	/* Disable MAC transmit. TX DMA disabled must be done before this */
+	umac_enable_set(priv, CMD_TX_EN, false);
+
+	phy_stop(dev->phydev);
 	bcmgenet_disable_rx_napi(priv);
-	bcmgenet_disable_tx_napi(priv);
+	bcmgenet_intr_disable(priv);
 
 	/* Wait for pending work items to complete. Since interrupts are
 	 * disabled no new work will be scheduled.
@@ -2973,33 +2886,23 @@ static void bcmgenet_netif_stop(struct net_device *dev)
 	priv->old_speed = -1;
 	priv->old_duplex = -1;
 	priv->old_pause = -1;
+
+	/* tx reclaim */
+	bcmgenet_tx_reclaim_all(dev);
+	bcmgenet_fini_dma(priv);
 }
 
 static int bcmgenet_close(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	int ret;
+	int ret = 0;
 
 	netif_dbg(priv, ifdown, dev, "bcmgenet_close\n");
 
 	bcmgenet_netif_stop(dev);
 
 	/* Really kill the PHY state machine and disconnect from it */
-	phy_disconnect(priv->phydev);
-
-	/* Disable MAC receive */
-	umac_enable_set(priv, CMD_RX_EN, false);
-
-	ret = bcmgenet_dma_teardown(priv);
-	if (ret)
-		return ret;
-
-	/* Disable MAC transmit. TX DMA disabled must be done before this */
-	umac_enable_set(priv, CMD_TX_EN, false);
-
-	/* tx reclaim */
-	bcmgenet_tx_reclaim_all(dev);
-	bcmgenet_fini_dma(priv);
+	phy_disconnect(dev->phydev);
 
 	free_irq(priv->irq0, priv);
 	free_irq(priv->irq1, priv);
@@ -3018,7 +2921,6 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring)
 	u32 p_index, c_index, intsts, intmsk;
 	struct netdev_queue *txq;
 	unsigned int free_bds;
-	unsigned long flags;
 	bool txq_stopped;
 
 	if (!netif_msg_tx_err(priv))
@@ -3026,7 +2928,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring)
 
 	txq = netdev_get_tx_queue(priv->dev, ring->queue);
 
-	spin_lock_irqsave(&ring->lock, flags);
+	spin_lock(&ring->lock);
 	if (ring->index == DESC_INDEX) {
 		intsts = ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
 		intmsk = UMAC_IRQ_TXDMA_DONE | UMAC_IRQ_TXDMA_MBDONE;
@@ -3038,7 +2940,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring)
 	p_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_PROD_INDEX);
 	txq_stopped = netif_tx_queue_stopped(txq);
 	free_bds = ring->free_bds;
-	spin_unlock_irqrestore(&ring->lock, flags);
+	spin_unlock(&ring->lock);
 
 	netif_err(priv, tx_err, priv->dev, "Ring %d queue %d status summary\n"
 		  "TX queue status: %s, interrupts: %s\n"
@@ -3564,9 +3466,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	    !strcasecmp(phy_mode_str, "internal"))
 		bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
 
-	err = reset_umac(priv);
-	if (err)
-		goto err_clk_disable;
+	reset_umac(priv);
 
 	err = bcmgenet_mii_init(dev);
 	if (err)
@@ -3614,7 +3514,7 @@ static int bcmgenet_suspend(struct device *d)
 {
 	struct net_device *dev = dev_get_drvdata(d);
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	int ret;
+	int ret = 0;
 
 	if (!netif_running(dev))
 		return 0;
@@ -3622,24 +3522,10 @@ static int bcmgenet_suspend(struct device *d)
 	bcmgenet_netif_stop(dev);
 
 	if (!device_may_wakeup(d))
-		phy_suspend(priv->phydev);
+		phy_suspend(dev->phydev);
 
 	netif_device_detach(dev);
 
-	/* Disable MAC receive */
-	umac_enable_set(priv, CMD_RX_EN, false);
-
-	ret = bcmgenet_dma_teardown(priv);
-	if (ret)
-		return ret;
-
-	/* Disable MAC transmit. TX DMA disabled must be done before this */
-	umac_enable_set(priv, CMD_TX_EN, false);
-
-	/* tx reclaim */
-	bcmgenet_tx_reclaim_all(dev);
-	bcmgenet_fini_dma(priv);
-
 	/* Prepare the device for Wake-on-LAN and switch to the slow clock */
 	if (device_may_wakeup(d) && priv->wolopts) {
 		ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
@@ -3678,21 +3564,17 @@ static int bcmgenet_resume(struct device *d)
 
 	bcmgenet_umac_reset(priv);
 
-	ret = init_umac(priv);
-	if (ret)
-		goto out_clk_disable;
+	init_umac(priv);
 
 	/* From WOL-enabled suspend, switch to regular clock */
 	if (priv->wolopts)
 		clk_disable_unprepare(priv->clk_wol);
 
-	phy_init_hw(priv->phydev);
+	phy_init_hw(dev->phydev);
+
 	/* Speed settings must be restored */
 	bcmgenet_mii_config(priv->dev, false);
 
-	/* disable ethernet MAC while updating its registers */
-	umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false);
-
 	bcmgenet_set_hw_addr(priv, dev->dev_addr);
 
 	if (priv->internal_phy) {
@@ -3720,7 +3602,7 @@ static int bcmgenet_resume(struct device *d)
 	netif_device_attach(dev);
 
 	if (!device_may_wakeup(d))
-		phy_resume(priv->phydev);
+		phy_resume(dev->phydev);
 
 	if (priv->eee.eee_enabled)
 		bcmgenet_eee_enable_set(dev, true);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 4c49d0b97748..3c50431ccd2a 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -617,7 +617,6 @@ struct bcmgenet_priv {
 
 	/* MDIO bus variables */
 	wait_queue_head_t wq;
-	struct phy_device *phydev;
 	bool internal_phy;
 	struct device_node *phy_dn;
 	struct device_node *mdio_dn;
@@ -711,7 +710,6 @@ int bcmgenet_mii_init(struct net_device *dev);
 int bcmgenet_mii_config(struct net_device *dev, bool init);
 int bcmgenet_mii_probe(struct net_device *dev);
 void bcmgenet_mii_exit(struct net_device *dev);
-void bcmgenet_mii_reset(struct net_device *dev);
 void bcmgenet_phy_power_set(struct net_device *dev, bool enable);
 void bcmgenet_mii_setup(struct net_device *dev);
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 18f5723be2c9..5333274a283c 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -34,7 +34,7 @@
 void bcmgenet_mii_setup(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	u32 reg, cmd_bits = 0;
 	bool status_changed = false;
 
@@ -121,22 +121,6 @@ static int bcmgenet_fixed_phy_link_update(struct net_device *dev,
 	return 0;
 }
 
-/* Perform a voluntary PHY software reset, since the EPHY is very finicky about
- * not doing it and will start corrupting packets
- */
-void bcmgenet_mii_reset(struct net_device *dev)
-{
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
-	if (GENET_IS_V4(priv))
-		return;
-
-	if (priv->phydev) {
-		phy_init_hw(priv->phydev);
-		phy_start_aneg(priv->phydev);
-	}
-}
-
 void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -182,14 +166,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 	}
 
 	if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
-		fixed_phy_set_link_update(priv->phydev,
+		fixed_phy_set_link_update(priv->dev->phydev,
 					  bcmgenet_fixed_phy_link_update);
 }
 
 int bcmgenet_mii_config(struct net_device *dev, bool init)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	struct device *kdev = &priv->pdev->dev;
 	const char *phy_name = NULL;
 	u32 id_mode_dis = 0;
@@ -236,7 +220,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
 		 * capabilities, use that knowledge to also configure the
 		 * Reverse MII interface correctly.
 		 */
-		if ((priv->phydev->supported & PHY_BASIC_FEATURES) ==
+		if ((dev->phydev->supported & PHY_BASIC_FEATURES) ==
 				PHY_BASIC_FEATURES)
 			port_ctrl = PORT_MODE_EXT_RVMII_25;
 		else
@@ -306,7 +290,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 			return -ENODEV;
 		}
 	} else {
-		phydev = priv->phydev;
+		phydev = dev->phydev;
 		phydev->dev_flags = phy_flags;
 
 		ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
@@ -317,8 +301,6 @@ int bcmgenet_mii_probe(struct net_device *dev)
 		}
 	}
 
-	priv->phydev = phydev;
-
 	/* Configure port multiplexer based on what the probed PHY device since
 	 * reading the 'max-speed' property determines the maximum supported
 	 * PHY speed which is needed for bcmgenet_mii_config() to configure
@@ -326,7 +308,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 */
 	ret = bcmgenet_mii_config(dev, true);
 	if (ret) {
-		phy_disconnect(priv->phydev);
+		phy_disconnect(dev->phydev);
 		return ret;
 	}
 
@@ -336,7 +318,7 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	 * Ethernet MAC ISRs
 	 */
 	if (priv->internal_phy)
-		priv->phydev->irq = PHY_IGNORE_INTERRUPT;
+		dev->phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;
 }
@@ -545,7 +527,6 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
 
 	}
 
-	priv->phydev = phydev;
 	priv->phy_interface = pd->phy_interface;
 
 	return 0;
@@ -590,5 +571,4 @@ void bcmgenet_mii_exit(struct net_device *dev)
 		of_phy_deregister_fixed_link(dn);
 	of_node_put(priv->phy_dn);
 	platform_device_unregister(priv->mii_pdev);
-	platform_device_put(priv->mii_pdev);
 }
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 656e6af70f0a..d8d5f207c759 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -11087,9 +11087,7 @@ static void tg3_timer_init(struct tg3 *tp)
 	tp->asf_multiplier = (HZ / tp->timer_offset) *
 			     TG3_FW_UPDATE_FREQ_SEC;
 
-	init_timer(&tp->timer);
-	tp->timer.data = (unsigned long) tp;
-	tp->timer.function = tg3_timer;
+	setup_timer(&tp->timer, tg3_timer, (unsigned long)tp);
 }
 
 static void tg3_timer_start(struct tg3 *tp)
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 6e13c937d715..a843076597ec 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -1693,9 +1693,9 @@ err_return:
 /* Timer callbacks */
 /* a) IOC timer */
 static void
-bnad_ioc_timeout(unsigned long data)
+bnad_ioc_timeout(struct timer_list *t)
 {
-	struct bnad *bnad = (struct bnad *)data;
+	struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.ioc_timer);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1704,9 +1704,9 @@ bnad_ioc_timeout(unsigned long data)
 }
 
 static void
-bnad_ioc_hb_check(unsigned long data)
+bnad_ioc_hb_check(struct timer_list *t)
 {
-	struct bnad *bnad = (struct bnad *)data;
+	struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.hb_timer);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1715,9 +1715,9 @@ bnad_ioc_hb_check(unsigned long data)
 }
 
 static void
-bnad_iocpf_timeout(unsigned long data)
+bnad_iocpf_timeout(struct timer_list *t)
 {
-	struct bnad *bnad = (struct bnad *)data;
+	struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.iocpf_timer);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1726,9 +1726,9 @@ bnad_iocpf_timeout(unsigned long data)
 }
 
 static void
-bnad_iocpf_sem_timeout(unsigned long data)
+bnad_iocpf_sem_timeout(struct timer_list *t)
 {
-	struct bnad *bnad = (struct bnad *)data;
+	struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.sem_timer);
 	unsigned long flags;
 
 	spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1748,9 +1748,9 @@ bnad_iocpf_sem_timeout(unsigned long data)
 
 /* b) Dynamic Interrupt Moderation Timer */
 static void
-bnad_dim_timeout(unsigned long data)
+bnad_dim_timeout(struct timer_list *t)
 {
-	struct bnad *bnad = (struct bnad *)data;
+	struct bnad *bnad = from_timer(bnad, t, dim_timer);
 	struct bnad_rx_info *rx_info;
 	struct bnad_rx_ctrl *rx_ctrl;
 	int i, j;
@@ -1781,9 +1781,9 @@ bnad_dim_timeout(unsigned long data)
 
 /* c)  Statistics Timer */
 static void
-bnad_stats_timeout(unsigned long data)
+bnad_stats_timeout(struct timer_list *t)
 {
-	struct bnad *bnad = (struct bnad *)data;
+	struct bnad *bnad = from_timer(bnad, t, stats_timer);
 	unsigned long flags;
 
 	if (!netif_running(bnad->netdev) ||
@@ -1804,8 +1804,7 @@ bnad_dim_timer_start(struct bnad *bnad)
 {
 	if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED &&
 	    !test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) {
-		setup_timer(&bnad->dim_timer, bnad_dim_timeout,
-			    (unsigned long)bnad);
+		timer_setup(&bnad->dim_timer, bnad_dim_timeout, 0);
 		set_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags);
 		mod_timer(&bnad->dim_timer,
 			  jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ));
@@ -1823,8 +1822,7 @@ bnad_stats_timer_start(struct bnad *bnad)
 
 	spin_lock_irqsave(&bnad->bna_lock, flags);
 	if (!test_and_set_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) {
-		setup_timer(&bnad->stats_timer, bnad_stats_timeout,
-			    (unsigned long)bnad);
+		timer_setup(&bnad->stats_timer, bnad_stats_timeout, 0);
 		mod_timer(&bnad->stats_timer,
 			  jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ));
 	}
@@ -3692,14 +3690,11 @@ bnad_pci_probe(struct pci_dev *pdev,
 		goto res_free;
 
 	/* Set up timers */
-	setup_timer(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout,
-		    (unsigned long)bnad);
-	setup_timer(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check,
-		    (unsigned long)bnad);
-	setup_timer(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout,
-		    (unsigned long)bnad);
-	setup_timer(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout,
-		    (unsigned long)bnad);
+	timer_setup(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout, 0);
+	timer_setup(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check, 0);
+	timer_setup(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout, 0);
+	timer_setup(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout,
+		    0);
 
 	/*
 	 * Start the chip
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 6df2cad61647..72a67f74b97b 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -611,6 +611,9 @@ static int macb_mii_init(struct macb *bp)
 err_out_unregister_bus:
 	mdiobus_unregister(bp->mii_bus);
 err_out_free_mdiobus:
+	of_node_put(bp->phy_node);
+	if (np && of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
 	mdiobus_free(bp->mii_bus);
 err_out:
 	return err;
@@ -1218,8 +1221,6 @@ static int macb_poll(struct napi_struct *napi, int budget)
 	status = macb_readl(bp, RSR);
 	macb_writel(bp, RSR, status);
 
-	work_done = 0;
-
 	netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
 		    (unsigned long)status, budget);
 
@@ -3552,6 +3553,9 @@ static int macb_probe(struct platform_device *pdev)
 err_out_unregister_mdio:
 	phy_disconnect(dev->phydev);
 	mdiobus_unregister(bp->mii_bus);
+	of_node_put(bp->phy_node);
+	if (np && of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
 	mdiobus_free(bp->mii_bus);
 
 	/* Shutdown the PHY if there is a GPIO reset */
@@ -3574,6 +3578,7 @@ static int macb_remove(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct macb *bp;
+	struct device_node *np = pdev->dev.of_node;
 
 	dev = platform_get_drvdata(pdev);
 
@@ -3582,6 +3587,8 @@ static int macb_remove(struct platform_device *pdev)
 		if (dev->phydev)
 			phy_disconnect(dev->phydev);
 		mdiobus_unregister(bp->mii_bus);
+		if (np && of_phy_is_fixed_link(np))
+			of_phy_deregister_fixed_link(np);
 		dev->phydev = NULL;
 		mdiobus_free(bp->mii_bus);
 
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index dcbce6cac63e..63be75eb34d2 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -53,6 +53,7 @@ config	THUNDER_NIC_RGX
 config LIQUIDIO
 	tristate "Cavium LiquidIO support"
 	depends on 64BIT
+	depends on MAY_USE_DEVLINK
 	imply PTP_1588_CLOCK
 	select FW_LOADER
 	select LIBCRC32C
diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile
index b802896bb2e0..e3fc4645cd8a 100644
--- a/drivers/net/ethernet/cavium/liquidio/Makefile
+++ b/drivers/net/ethernet/cavium/liquidio/Makefile
@@ -18,7 +18,7 @@ liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \
 			octeon_droq.o      \
 			octeon_nic.o
 
-liquidio-objs := lio_main.o octeon_console.o $(liquidio-y)
+liquidio-objs := lio_main.o octeon_console.o lio_vf_rep.o $(liquidio-y)
 
 obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 23f6b60030c5..32ae63b6f20e 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -91,7 +91,7 @@ void octeon_update_tx_completion_counters(void *buf, int reqtype,
 	*bytes_compl += skb->len;
 }
 
-void octeon_report_sent_bytes_to_bql(void *buf, int reqtype)
+int octeon_report_sent_bytes_to_bql(void *buf, int reqtype)
 {
 	struct octnet_buf_free_info *finfo;
 	struct sk_buff *skb;
@@ -112,11 +112,13 @@ void octeon_report_sent_bytes_to_bql(void *buf, int reqtype)
 		break;
 
 	default:
-		return;
+		return 0;
 	}
 
 	txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb));
 	netdev_tx_sent_queue(txq, skb->len);
+
+	return netif_xmit_stopped(txq);
 }
 
 void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
@@ -141,6 +143,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 	switch (nctrl->ncmd.s.cmd) {
 	case OCTNET_CMD_CHANGE_DEVFLAGS:
 	case OCTNET_CMD_SET_MULTI_LIST:
+	case OCTNET_CMD_SET_UC_LIST:
 		break;
 
 	case OCTNET_CMD_CHANGE_MACADDR:
@@ -464,7 +467,6 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 	if (netdev) {
 		struct lio *lio = GET_LIO(netdev);
 		struct octeon_device *oct = lio->oct_dev;
-		int packet_was_received;
 
 		/* Do not proceed if the interface is not in RUNNING state. */
 		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
@@ -567,18 +569,10 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
 		}
 
-		packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP);
-
-		if (packet_was_received) {
-			droq->stats.rx_bytes_received += len;
-			droq->stats.rx_pkts_received++;
-		} else {
-			droq->stats.rx_dropped++;
-			netif_info(lio, rx_err, lio->netdev,
-				   "droq:%d  error rx_dropped:%llu\n",
-				   droq->q_no, droq->stats.rx_dropped);
-		}
+		napi_gro_receive(napi, skb);
 
+		droq->stats.rx_bytes_received += len;
+		droq->stats.rx_pkts_received++;
 	} else {
 		recv_buffer_free(skb);
 	}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 5b19826a7e16..6aa0eee88ea5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -21,6 +21,7 @@
 #include <linux/firmware.h>
 #include <net/vxlan.h>
 #include <linux/kthread.h>
+#include <net/switchdev.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -34,6 +35,7 @@
 #include "cn68xx_device.h"
 #include "cn23xx_pf_device.h"
 #include "liquidio_image.h"
+#include "lio_vf_rep.h"
 
 MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
 MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Driver");
@@ -59,9 +61,9 @@ static int debug = -1;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
 
-static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_NIC;
+static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_AUTO;
 module_param_string(fw_type, fw_type, sizeof(fw_type), 0444);
-MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\".  Use \"none\" to load firmware from flash.");
+MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded (default is \"auto\"), which uses firmware in flash, if present, else loads \"nic\".");
 
 static u32 console_bitmask;
 module_param(console_bitmask, int, 0644);
@@ -83,6 +85,11 @@ static int octeon_console_debug_enabled(u32 console)
 
 /* runtime link query interval */
 #define LIQUIDIO_LINK_QUERY_INTERVAL_MS         1000
+/* update localtime to octeon firmware every 60 seconds.
+ * make firmware to use same time reference, so that it will be easy to
+ * correlate firmware logged events/errors with host events, for debugging.
+ */
+#define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000
 
 struct liquidio_if_cfg_context {
 	int octeon_id;
@@ -901,6 +908,121 @@ static inline void update_link_status(struct net_device *netdev,
 	}
 }
 
+/**
+ * lio_sync_octeon_time_cb - callback that is invoked when soft command
+ * sent by lio_sync_octeon_time() has completed successfully or failed
+ *
+ * @oct - octeon device structure
+ * @status - indicates success or failure
+ * @buf - pointer to the command that was sent to firmware
+ **/
+static void lio_sync_octeon_time_cb(struct octeon_device *oct,
+				    u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+
+	if (status)
+		dev_err(&oct->pci_dev->dev,
+			"Failed to sync time to octeon; error=%d\n", status);
+
+	octeon_free_soft_command(oct, sc);
+}
+
+/**
+ * lio_sync_octeon_time - send latest localtime to octeon firmware so that
+ * firmware will correct it's time, in case there is a time skew
+ *
+ * @work: work scheduled to send time update to octeon firmware
+ **/
+static void lio_sync_octeon_time(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct lio *lio = (struct lio *)wk->ctxptr;
+	struct octeon_device *oct = lio->oct_dev;
+	struct octeon_soft_command *sc;
+	struct timespec64 ts;
+	struct lio_time *lt;
+	int ret;
+
+	sc = octeon_alloc_soft_command(oct, sizeof(struct lio_time), 0, 0);
+	if (!sc) {
+		dev_err(&oct->pci_dev->dev,
+			"Failed to sync time to octeon: soft command allocation failed\n");
+		return;
+	}
+
+	lt = (struct lio_time *)sc->virtdptr;
+
+	/* Get time of the day */
+	getnstimeofday64(&ts);
+	lt->sec = ts.tv_sec;
+	lt->nsec = ts.tv_nsec;
+	octeon_swap_8B_data((u64 *)lt, (sizeof(struct lio_time)) / 8);
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_SYNC_OCTEON_TIME, 0, 0, 0);
+
+	sc->callback = lio_sync_octeon_time_cb;
+	sc->callback_arg = sc;
+	sc->wait_time = 1000;
+
+	ret = octeon_send_soft_command(oct, sc);
+	if (ret == IQ_SEND_FAILED) {
+		dev_err(&oct->pci_dev->dev,
+			"Failed to sync time to octeon: failed to send soft command\n");
+		octeon_free_soft_command(oct, sc);
+	}
+
+	queue_delayed_work(lio->sync_octeon_time_wq.wq,
+			   &lio->sync_octeon_time_wq.wk.work,
+			   msecs_to_jiffies(LIO_SYNC_OCTEON_TIME_INTERVAL_MS));
+}
+
+/**
+ * setup_sync_octeon_time_wq - Sets up the work to periodically update
+ * local time to octeon firmware
+ *
+ * @netdev - network device which should send time update to firmware
+ **/
+static inline int setup_sync_octeon_time_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	lio->sync_octeon_time_wq.wq =
+		alloc_workqueue("update-octeon-time", WQ_MEM_RECLAIM, 0);
+	if (!lio->sync_octeon_time_wq.wq) {
+		dev_err(&oct->pci_dev->dev, "Unable to create wq to update octeon time\n");
+		return -1;
+	}
+	INIT_DELAYED_WORK(&lio->sync_octeon_time_wq.wk.work,
+			  lio_sync_octeon_time);
+	lio->sync_octeon_time_wq.wk.ctxptr = lio;
+	queue_delayed_work(lio->sync_octeon_time_wq.wq,
+			   &lio->sync_octeon_time_wq.wk.work,
+			   msecs_to_jiffies(LIO_SYNC_OCTEON_TIME_INTERVAL_MS));
+
+	return 0;
+}
+
+/**
+ * cleanup_sync_octeon_time_wq - stop scheduling and destroy the work created
+ * to periodically update local time to octeon firmware
+ *
+ * @netdev - network device which should send time update to firmware
+ **/
+static inline void cleanup_sync_octeon_time_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct cavium_wq *time_wq = &lio->sync_octeon_time_wq;
+
+	if (time_wq->wq) {
+		cancel_delayed_work_sync(&time_wq->wk.work);
+		destroy_workqueue(time_wq->wq);
+	}
+}
+
 static struct octeon_device *get_other_octeon_device(struct octeon_device *oct)
 {
 	struct octeon_device *other_oct;
@@ -1076,19 +1198,13 @@ liquidio_probe(struct pci_dev *pdev,
 	}
 
 	if (OCTEON_CN23XX_PF(oct_dev)) {
-		u64 scratch1;
 		u8 bus, device, function;
 
-		scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1);
-		if (!(scratch1 & 4ULL)) {
-			/* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that
-			 * the lio watchdog kernel thread is running for this
-			 * NIC.  Each NIC gets one watchdog kernel thread.
+		if (atomic_read(oct_dev->adapter_refcount) == 1) {
+			/* Each NIC gets one watchdog kernel thread.  The first
+			 * PF (of each NIC) that gets pci_driver->probe()'d
+			 * creates that thread.
 			 */
-			scratch1 |= 4ULL;
-			octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1,
-					   scratch1);
-
 			bus = pdev->bus->number;
 			device = PCI_SLOT(pdev->devfn);
 			function = PCI_FUNC(pdev->devfn);
@@ -1115,10 +1231,10 @@ liquidio_probe(struct pci_dev *pdev,
 	return 0;
 }
 
-static bool fw_type_is_none(void)
+static bool fw_type_is_auto(void)
 {
-	return strncmp(fw_type, LIO_FW_NAME_TYPE_NONE,
-		       sizeof(LIO_FW_NAME_TYPE_NONE)) == 0;
+	return strncmp(fw_type, LIO_FW_NAME_TYPE_AUTO,
+		       sizeof(LIO_FW_NAME_TYPE_AUTO)) == 0;
 }
 
 /**
@@ -1302,7 +1418,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 		 * Implementation note: only soft-reset the device
 		 * if it is a CN6XXX OR the LAST CN23XX device.
 		 */
-		if (fw_type_is_none())
+		if (atomic_read(oct->adapter_fw_state) == FW_IS_PRELOADED)
 			octeon_pci_flr(oct);
 		else if (OCTEON_CN6XXX(oct) || !refcount)
 			oct->fn_list.soft_reset(oct);
@@ -1455,6 +1571,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
+	cleanup_sync_octeon_time_wq(netdev);
 	cleanup_link_status_change_wq(netdev);
 
 	cleanup_rx_oom_poll_fn(netdev);
@@ -1487,6 +1604,8 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 	oct->cmd_resp_state = OCT_DRV_OFFLINE;
 	spin_unlock_bh(&oct->cmd_resp_wqlock);
 
+	lio_vf_rep_destroy(oct);
+
 	for (i = 0; i < oct->ifcount; i++) {
 		lio = GET_LIO(oct->props[i].netdev);
 		for (j = 0; j < oct->num_oqs; j++)
@@ -1497,6 +1616,12 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 	for (i = 0; i < oct->ifcount; i++)
 		liquidio_destroy_nic_device(oct, i);
 
+	if (oct->devlink) {
+		devlink_unregister(oct->devlink);
+		devlink_free(oct->devlink);
+		oct->devlink = NULL;
+	}
+
 	dev_dbg(&oct->pci_dev->dev, "Network interfaces stopped\n");
 	return 0;
 }
@@ -1514,6 +1639,10 @@ static void liquidio_remove(struct pci_dev *pdev)
 	if (oct_dev->watchdog_task)
 		kthread_stop(oct_dev->watchdog_task);
 
+	if (!oct_dev->octeon_id &&
+	    oct_dev->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP)
+		lio_vf_rep_modexit();
+
 	if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP))
 		liquidio_stop_nic_module(oct_dev);
 
@@ -1934,10 +2063,12 @@ static int load_firmware(struct octeon_device *oct)
 	char fw_name[LIO_MAX_FW_FILENAME_LEN];
 	char *tmp_fw_type;
 
-	if (fw_type[0] == '\0')
+	if (fw_type_is_auto()) {
 		tmp_fw_type = LIO_FW_NAME_TYPE_NIC;
-	else
+		strncpy(fw_type, tmp_fw_type, sizeof(fw_type));
+	} else {
 		tmp_fw_type = fw_type;
+	}
 
 	sprintf(fw_name, "%s%s%s_%s%s", LIO_FW_DIR, LIO_FW_BASE_NAME,
 		octeon_get_conf(oct)->card_name, tmp_fw_type,
@@ -2477,7 +2608,8 @@ static void handle_timestamp(struct octeon_device *oct,
  */
 static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 					 struct octnic_data_pkt *ndata,
-					 struct octnet_buf_free_info *finfo)
+					 struct octnet_buf_free_info *finfo,
+					 int xmit_more)
 {
 	int retval;
 	struct octeon_soft_command *sc;
@@ -2512,7 +2644,7 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 		len = (u32)((struct octeon_instr_ih2 *)
 			    (&sc->cmd.cmd2.ih2))->dlengsz;
 
-	ring_doorbell = 1;
+	ring_doorbell = !xmit_more;
 
 	retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
 				     sc, len, ndata->reqtype);
@@ -2546,7 +2678,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	union tx_info *tx_info;
 	int status = 0;
 	int q_idx = 0, iq_no = 0;
-	int j;
+	int j, xmit_more = 0;
 	u64 dptr = 0;
 	u32 tag = 0;
 
@@ -2751,17 +2883,19 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		irh->vlan = skb_vlan_tag_get(skb) & 0xfff;
 	}
 
+	xmit_more = skb->xmit_more;
+
 	if (unlikely(cmdsetup.s.timestamp))
-		status = send_nic_timestamp_pkt(oct, &ndata, finfo);
+		status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more);
 	else
-		status = octnet_send_nic_data_pkt(oct, &ndata);
+		status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more);
 	if (status == IQ_SEND_FAILED)
 		goto lio_xmit_failed;
 
 	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
 
 	if (status == IQ_SEND_STOP)
-		stop_q(lio->netdev, q_idx);
+		stop_q(netdev, q_idx);
 
 	netif_trans_update(netdev);
 
@@ -2780,6 +2914,9 @@ lio_xmit_failed:
 	if (dptr)
 		dma_unmap_single(&oct->pci_dev->dev, dptr,
 				 ndata.datasize, DMA_TO_DEVICE);
+
+	octeon_ring_doorbell_locked(oct, iq_no);
+
 	tx_buffer_free(skb);
 	return NETDEV_TX_OK;
 }
@@ -3186,6 +3323,86 @@ static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
 	return 0;
 }
 
+static int
+liquidio_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct lio_devlink_priv *priv;
+	struct octeon_device *oct;
+
+	priv = devlink_priv(devlink);
+	oct = priv->oct;
+
+	*mode = oct->eswitch_mode;
+
+	return 0;
+}
+
+static int
+liquidio_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct lio_devlink_priv *priv;
+	struct octeon_device *oct;
+	int ret = 0;
+
+	priv = devlink_priv(devlink);
+	oct = priv->oct;
+
+	if (!(oct->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP))
+		return -EINVAL;
+
+	if (oct->eswitch_mode == mode)
+		return 0;
+
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		oct->eswitch_mode = mode;
+		ret = lio_vf_rep_create(oct);
+		break;
+
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		lio_vf_rep_destroy(oct);
+		oct->eswitch_mode = mode;
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct devlink_ops liquidio_devlink_ops = {
+	.eswitch_mode_get = liquidio_eswitch_mode_get,
+	.eswitch_mode_set = liquidio_eswitch_mode_set,
+};
+
+static int
+lio_pf_switchdev_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+	struct lio *lio = GET_LIO(dev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return -EOPNOTSUPP;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		attr->u.ppid.id_len = ETH_ALEN;
+		ether_addr_copy(attr->u.ppid.id,
+				(void *)&lio->linfo.hw_addr + 2);
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct switchdev_ops lio_pf_switchdev_ops = {
+	.switchdev_port_attr_get = lio_pf_switchdev_attr_get,
+};
+
 static const struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
@@ -3303,7 +3520,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 {
 	struct lio *lio = NULL;
 	struct net_device *netdev;
-	u8 mac[6], i, j;
+	u8 mac[6], i, j, *fw_ver;
 	struct octeon_soft_command *sc;
 	struct liquidio_if_cfg_context *ctx;
 	struct liquidio_if_cfg_resp *resp;
@@ -3315,6 +3532,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 	u32 resp_size, ctx_size, data_size;
 	u32 ifidx_or_pfnum;
 	struct lio_version *vdata;
+	struct devlink *devlink;
+	struct lio_devlink_priv *lio_devlink;
 
 	/* This is to handle link status changes */
 	octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
@@ -3414,6 +3633,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			goto setup_nic_dev_fail;
 		}
 
+		/* Verify f/w version (in case of 'auto' loading from flash) */
+		fw_ver = octeon_dev->fw_info.liquidio_firmware_version;
+		if (memcmp(LIQUIDIO_BASE_VERSION,
+			   fw_ver,
+			   strlen(LIQUIDIO_BASE_VERSION))) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"Unmatched firmware version. Expected %s.x, got %s.\n",
+				LIQUIDIO_BASE_VERSION, fw_ver);
+			goto setup_nic_dev_fail;
+		} else if (atomic_read(octeon_dev->adapter_fw_state) ==
+			   FW_IS_PRELOADED) {
+			dev_info(&octeon_dev->pci_dev->dev,
+				 "Using auto-loaded firmware version %s.\n",
+				 fw_ver);
+		}
+
 		octeon_swap_8B_data((u64 *)(&resp->cfg_info),
 				    (sizeof(struct liquidio_if_cfg_info)) >> 3);
 
@@ -3444,6 +3679,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		 * netdev tasks.
 		 */
 		netdev->netdev_ops = &lionetdevops;
+		SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops);
 
 		lio = GET_LIO(netdev);
 
@@ -3593,6 +3829,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		if (setup_link_status_change_wq(netdev))
 			goto setup_nic_dev_fail;
 
+		if ((octeon_dev->fw_info.app_cap_flags &
+		     LIQUIDIO_TIME_SYNC_CAP) &&
+		    setup_sync_octeon_time_wq(netdev))
+			goto setup_nic_dev_fail;
+
 		if (setup_rx_oom_poll_fn(netdev))
 			goto setup_nic_dev_fail;
 
@@ -3625,6 +3866,26 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		octeon_free_soft_command(octeon_dev, sc);
 	}
 
+	devlink = devlink_alloc(&liquidio_devlink_ops,
+				sizeof(struct lio_devlink_priv));
+	if (!devlink) {
+		dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n");
+		goto setup_nic_wait_intr;
+	}
+
+	lio_devlink = devlink_priv(devlink);
+	lio_devlink->oct = octeon_dev;
+
+	if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) {
+		devlink_free(devlink);
+		dev_err(&octeon_dev->pci_dev->dev,
+			"devlink registration failed\n");
+		goto setup_nic_wait_intr;
+	}
+
+	octeon_dev->devlink = devlink;
+	octeon_dev->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+
 	return 0;
 
 setup_nic_dev_fail:
@@ -3719,6 +3980,7 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs)
 	}
 
 	if (!num_vfs) {
+		lio_vf_rep_destroy(oct);
 		ret = lio_pci_sriov_disable(oct);
 	} else if (num_vfs > oct->sriov_info.max_vfs) {
 		dev_err(&oct->pci_dev->dev,
@@ -3730,6 +3992,10 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs)
 		ret = octeon_enable_sriov(oct);
 		dev_info(&oct->pci_dev->dev, "oct->pf_num:%d num_vfs:%d\n",
 			 oct->pf_num, num_vfs);
+		ret = lio_vf_rep_create(oct);
+		if (ret)
+			dev_info(&oct->pci_dev->dev,
+				 "vf representor create failed");
 	}
 
 	return ret;
@@ -3767,6 +4033,18 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
 		goto octnet_init_failure;
 	}
 
+	/* Call vf_rep_modinit if the firmware is switchdev capable
+	 * and do it from the first liquidio function probed.
+	 */
+	if (!oct->octeon_id &&
+	    oct->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP) {
+		retval = lio_vf_rep_modinit();
+		if (retval) {
+			liquidio_stop_nic_module(oct);
+			goto octnet_init_failure;
+		}
+	}
+
 	liquidio_ptp_init(oct);
 
 	dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
@@ -3882,9 +4160,9 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf)
 static int octeon_device_init(struct octeon_device *octeon_dev)
 {
 	int j, ret;
-	int fw_loaded = 0;
 	char bootcmd[] = "\n";
 	char *dbg_enb = NULL;
+	enum lio_fw_state fw_state;
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)octeon_dev->priv;
 	atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE);
@@ -3916,24 +4194,40 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	octeon_dev->app_mode = CVM_DRV_INVALID_APP;
 
-	if (OCTEON_CN23XX_PF(octeon_dev)) {
-		if (!cn23xx_fw_loaded(octeon_dev) && !fw_type_is_none()) {
-			fw_loaded = 0;
-			/* Do a soft reset of the Octeon device. */
-			if (octeon_dev->fn_list.soft_reset(octeon_dev))
-				return 1;
-			/* things might have changed */
-			if (!cn23xx_fw_loaded(octeon_dev))
-				fw_loaded = 0;
-			else
-				fw_loaded = 1;
-		} else {
-			fw_loaded = 1;
-		}
-	} else if (octeon_dev->fn_list.soft_reset(octeon_dev)) {
-		return 1;
+	/* CN23XX supports preloaded firmware if the following is true:
+	 *
+	 * The adapter indicates that firmware is currently running AND
+	 * 'fw_type' is 'auto'.
+	 *
+	 * (default state is NEEDS_TO_BE_LOADED, override it if appropriate).
+	 */
+	if (OCTEON_CN23XX_PF(octeon_dev) &&
+	    cn23xx_fw_loaded(octeon_dev) && fw_type_is_auto()) {
+		atomic_cmpxchg(octeon_dev->adapter_fw_state,
+			       FW_NEEDS_TO_BE_LOADED, FW_IS_PRELOADED);
 	}
 
+	/* If loading firmware, only first device of adapter needs to do so. */
+	fw_state = atomic_cmpxchg(octeon_dev->adapter_fw_state,
+				  FW_NEEDS_TO_BE_LOADED,
+				  FW_IS_BEING_LOADED);
+
+	/* Here, [local variable] 'fw_state' is set to one of:
+	 *
+	 *   FW_IS_PRELOADED:       No firmware is to be loaded (see above)
+	 *   FW_NEEDS_TO_BE_LOADED: The driver's first instance will load
+	 *                          firmware to the adapter.
+	 *   FW_IS_BEING_LOADED:    The driver's second instance will not load
+	 *                          firmware to the adapter.
+	 */
+
+	/* Prior to f/w load, perform a soft reset of the Octeon device;
+	 * if error resetting, return w/error.
+	 */
+	if (fw_state == FW_NEEDS_TO_BE_LOADED)
+		if (octeon_dev->fn_list.soft_reset(octeon_dev))
+			return 1;
+
 	/* Initialize the dispatch mechanism used to push packets arriving on
 	 * Octeon Output queues.
 	 */
@@ -4063,7 +4357,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE);
 
-	if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) {
+	if (fw_state == FW_NEEDS_TO_BE_LOADED) {
 		dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n");
 		if (!ddr_timeout) {
 			dev_info(&octeon_dev->pci_dev->dev,
@@ -4125,6 +4419,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 			dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n");
 			return 1;
 		}
+
+		atomic_set(octeon_dev->adapter_fw_state, FW_HAS_BEEN_LOADED);
 	}
 
 	handshake[octeon_dev->octeon_id].init_ok = 1;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 2e993ce43b66..fd70a4844e2d 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -435,8 +435,7 @@ static void delete_glists(struct lio *lio)
 		do {
 			g = (struct octnic_gather *)
 			    list_delete_head(&lio->glist[i]);
-			if (g)
-				kfree(g);
+			kfree(g);
 		} while (g);
 
 		if (lio->glists_virt_base && lio->glists_virt_base[i] &&
@@ -748,7 +747,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 
 		if (lio_wait_for_oq_pkts(oct))
 			dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
-
+		/* fall through */
 	case OCT_DEV_INTR_SET_DONE:
 		/* Disable interrupts  */
 		oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
@@ -1289,6 +1288,9 @@ static int liquidio_stop(struct net_device *netdev)
 	struct octeon_device *oct = lio->oct_dev;
 	struct napi_struct *napi, *n;
 
+	/* tell Octeon to stop forwarding packets to host */
+	send_rx_ctrl_cmd(lio, 0);
+
 	if (oct->props[lio->ifidx].napi_enabled) {
 		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
 			napi_disable(napi);
@@ -1306,9 +1308,6 @@ static int liquidio_stop(struct net_device *netdev)
 	netif_carrier_off(netdev);
 	lio->link_changes++;
 
-	/* tell Octeon to stop forwarding packets to host */
-	send_rx_ctrl_cmd(lio, 0);
-
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
 	txqs_stop(netdev);
@@ -1691,7 +1690,8 @@ static void handle_timestamp(struct octeon_device *oct, u32 status, void *buf)
  */
 static int send_nic_timestamp_pkt(struct octeon_device *oct,
 				  struct octnic_data_pkt *ndata,
-				  struct octnet_buf_free_info *finfo)
+				  struct octnet_buf_free_info *finfo,
+				  int xmit_more)
 {
 	struct octeon_soft_command *sc;
 	int ring_doorbell;
@@ -1721,7 +1721,7 @@ static int send_nic_timestamp_pkt(struct octeon_device *oct,
 
 	len = (u32)((struct octeon_instr_ih3 *)(&sc->cmd.cmd3.ih3))->dlengsz;
 
-	ring_doorbell = 1;
+	ring_doorbell = !xmit_more;
 
 	retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
 				     sc, len, ndata->reqtype);
@@ -1753,6 +1753,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	struct octeon_device *oct;
 	int q_idx = 0, iq_no = 0;
 	union tx_info *tx_info;
+	int xmit_more = 0;
 	struct lio *lio;
 	int status = 0;
 	u64 dptr = 0;
@@ -1941,10 +1942,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 		irh->vlan = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
 	}
 
+	xmit_more = skb->xmit_more;
+
 	if (unlikely(cmdsetup.s.timestamp))
-		status = send_nic_timestamp_pkt(oct, &ndata, finfo);
+		status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more);
 	else
-		status = octnet_send_nic_data_pkt(oct, &ndata);
+		status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more);
 	if (status == IQ_SEND_FAILED)
 		goto lio_xmit_failed;
 
@@ -1953,7 +1956,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if (status == IQ_SEND_STOP) {
 		dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
 			iq_no);
-		stop_q(lio->netdev, q_idx);
+		stop_q(netdev, q_idx);
 	}
 
 	netif_trans_update(netdev);
@@ -1973,6 +1976,9 @@ lio_xmit_failed:
 	if (dptr)
 		dma_unmap_single(&oct->pci_dev->dev, dptr,
 				 ndata.datasize, DMA_TO_DEVICE);
+
+	octeon_ring_doorbell_locked(oct, iq_no);
+
 	tx_buffer_free(skb);
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
new file mode 100644
index 000000000000..2adafa366d3f
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
@@ -0,0 +1,695 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2017 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#include <linux/pci.h>
+#include <linux/if_vlan.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "octeon_nic.h"
+#include "octeon_main.h"
+#include "octeon_network.h"
+#include <net/switchdev.h>
+#include "lio_vf_rep.h"
+#include "octeon_network.h"
+
+static int lio_vf_rep_open(struct net_device *ndev);
+static int lio_vf_rep_stop(struct net_device *ndev);
+static int lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev);
+static void lio_vf_rep_tx_timeout(struct net_device *netdev);
+static int lio_vf_rep_phys_port_name(struct net_device *dev,
+				     char *buf, size_t len);
+static void lio_vf_rep_get_stats64(struct net_device *dev,
+				   struct rtnl_link_stats64 *stats64);
+static int lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu);
+
+static const struct net_device_ops lio_vf_rep_ndev_ops = {
+	.ndo_open = lio_vf_rep_open,
+	.ndo_stop = lio_vf_rep_stop,
+	.ndo_start_xmit = lio_vf_rep_pkt_xmit,
+	.ndo_tx_timeout = lio_vf_rep_tx_timeout,
+	.ndo_get_phys_port_name = lio_vf_rep_phys_port_name,
+	.ndo_get_stats64 = lio_vf_rep_get_stats64,
+	.ndo_change_mtu = lio_vf_rep_change_mtu,
+};
+
+static void
+lio_vf_rep_send_sc_complete(struct octeon_device *oct,
+			    u32 status, void *ptr)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+	struct lio_vf_rep_sc_ctx *ctx =
+		(struct lio_vf_rep_sc_ctx *)sc->ctxptr;
+	struct lio_vf_rep_resp *resp =
+		(struct lio_vf_rep_resp *)sc->virtrptr;
+
+	if (status != OCTEON_REQUEST_TIMEOUT && READ_ONCE(resp->status))
+		WRITE_ONCE(resp->status, 0);
+
+	complete(&ctx->complete);
+}
+
+static int
+lio_vf_rep_send_soft_command(struct octeon_device *oct,
+			     void *req, int req_size,
+			     void *resp, int resp_size)
+{
+	int tot_resp_size = sizeof(struct lio_vf_rep_resp) + resp_size;
+	int ctx_size = sizeof(struct lio_vf_rep_sc_ctx);
+	struct octeon_soft_command *sc = NULL;
+	struct lio_vf_rep_resp *rep_resp;
+	struct lio_vf_rep_sc_ctx *ctx;
+	void *sc_req;
+	int err;
+
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, req_size,
+					  tot_resp_size, ctx_size);
+	if (!sc)
+		return -ENOMEM;
+
+	ctx = (struct lio_vf_rep_sc_ctx *)sc->ctxptr;
+	memset(ctx, 0, ctx_size);
+	init_completion(&ctx->complete);
+
+	sc_req = (struct lio_vf_rep_req *)sc->virtdptr;
+	memcpy(sc_req, req, req_size);
+
+	rep_resp = (struct lio_vf_rep_resp *)sc->virtrptr;
+	memset(rep_resp, 0, tot_resp_size);
+	WRITE_ONCE(rep_resp->status, 1);
+
+	sc->iq_no = 0;
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_VF_REP_CMD, 0, 0, 0);
+	sc->callback = lio_vf_rep_send_sc_complete;
+	sc->callback_arg = sc;
+	sc->wait_time = LIO_VF_REP_REQ_TMO_MS;
+
+	err = octeon_send_soft_command(oct, sc);
+	if (err == IQ_SEND_FAILED)
+		goto free_buff;
+
+	wait_for_completion_timeout(&ctx->complete,
+				    msecs_to_jiffies
+				    (2 * LIO_VF_REP_REQ_TMO_MS));
+	err = READ_ONCE(rep_resp->status) ? -EBUSY : 0;
+	if (err)
+		dev_err(&oct->pci_dev->dev, "VF rep send config failed\n");
+
+	if (resp)
+		memcpy(resp, (rep_resp + 1), resp_size);
+free_buff:
+	octeon_free_soft_command(oct, sc);
+
+	return err;
+}
+
+static int
+lio_vf_rep_open(struct net_device *ndev)
+{
+	struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev);
+	struct lio_vf_rep_req rep_cfg;
+	struct octeon_device *oct;
+	int ret;
+
+	oct = vf_rep->oct;
+
+	memset(&rep_cfg, 0, sizeof(rep_cfg));
+	rep_cfg.req_type = LIO_VF_REP_REQ_STATE;
+	rep_cfg.ifidx = vf_rep->ifidx;
+	rep_cfg.rep_state.state = LIO_VF_REP_STATE_UP;
+
+	ret = lio_vf_rep_send_soft_command(oct, &rep_cfg,
+					   sizeof(rep_cfg), NULL, 0);
+
+	if (ret) {
+		dev_err(&oct->pci_dev->dev,
+			"VF_REP open failed with err %d\n", ret);
+		return -EIO;
+	}
+
+	atomic_set(&vf_rep->ifstate, (atomic_read(&vf_rep->ifstate) |
+				      LIO_IFSTATE_RUNNING));
+
+	netif_carrier_on(ndev);
+	netif_start_queue(ndev);
+
+	return 0;
+}
+
+static int
+lio_vf_rep_stop(struct net_device *ndev)
+{
+	struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev);
+	struct lio_vf_rep_req rep_cfg;
+	struct octeon_device *oct;
+	int ret;
+
+	oct = vf_rep->oct;
+
+	memset(&rep_cfg, 0, sizeof(rep_cfg));
+	rep_cfg.req_type = LIO_VF_REP_REQ_STATE;
+	rep_cfg.ifidx = vf_rep->ifidx;
+	rep_cfg.rep_state.state = LIO_VF_REP_STATE_DOWN;
+
+	ret = lio_vf_rep_send_soft_command(oct, &rep_cfg,
+					   sizeof(rep_cfg), NULL, 0);
+
+	if (ret) {
+		dev_err(&oct->pci_dev->dev,
+			"VF_REP dev stop failed with err %d\n", ret);
+		return -EIO;
+	}
+
+	atomic_set(&vf_rep->ifstate, (atomic_read(&vf_rep->ifstate) &
+				      ~LIO_IFSTATE_RUNNING));
+
+	netif_tx_disable(ndev);
+	netif_carrier_off(ndev);
+
+	return 0;
+}
+
+static void
+lio_vf_rep_tx_timeout(struct net_device *ndev)
+{
+	netif_trans_update(ndev);
+
+	netif_wake_queue(ndev);
+}
+
+static void
+lio_vf_rep_get_stats64(struct net_device *dev,
+		       struct rtnl_link_stats64 *stats64)
+{
+	struct lio_vf_rep_desc *vf_rep = netdev_priv(dev);
+
+	stats64->tx_packets = vf_rep->stats.tx_packets;
+	stats64->tx_bytes   = vf_rep->stats.tx_bytes;
+	stats64->tx_dropped = vf_rep->stats.tx_dropped;
+
+	stats64->rx_packets = vf_rep->stats.rx_packets;
+	stats64->rx_bytes   = vf_rep->stats.rx_bytes;
+	stats64->rx_dropped = vf_rep->stats.rx_dropped;
+}
+
+static int
+lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev);
+	struct lio_vf_rep_req rep_cfg;
+	struct octeon_device *oct;
+	int ret;
+
+	oct = vf_rep->oct;
+
+	memset(&rep_cfg, 0, sizeof(rep_cfg));
+	rep_cfg.req_type = LIO_VF_REP_REQ_MTU;
+	rep_cfg.ifidx = vf_rep->ifidx;
+	rep_cfg.rep_mtu.mtu = cpu_to_be32(new_mtu);
+
+	ret = lio_vf_rep_send_soft_command(oct, &rep_cfg,
+					   sizeof(rep_cfg), NULL, 0);
+	if (ret) {
+		dev_err(&oct->pci_dev->dev,
+			"Change MTU failed with err %d\n", ret);
+		return -EIO;
+	}
+
+	ndev->mtu = new_mtu;
+
+	return 0;
+}
+
+static int
+lio_vf_rep_phys_port_name(struct net_device *dev,
+			  char *buf, size_t len)
+{
+	struct lio_vf_rep_desc *vf_rep = netdev_priv(dev);
+	struct octeon_device *oct = vf_rep->oct;
+	int ret;
+
+	ret = snprintf(buf, len, "pf%dvf%d", oct->pf_num,
+		       vf_rep->ifidx - oct->pf_num * 64 - 1);
+	if (ret >= len)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static struct net_device *
+lio_vf_rep_get_ndev(struct octeon_device *oct, int ifidx)
+{
+	int vf_id, max_vfs = CN23XX_MAX_VFS_PER_PF + 1;
+	int vfid_mask = max_vfs - 1;
+
+	if (ifidx <= oct->pf_num * max_vfs ||
+	    ifidx >= oct->pf_num * max_vfs + max_vfs)
+		return NULL;
+
+	/* ifidx 1-63 for PF0 VFs
+	 * ifidx 65-127 for PF1 VFs
+	 */
+	vf_id = (ifidx & vfid_mask) - 1;
+
+	return oct->vf_rep_list.ndev[vf_id];
+}
+
+static void
+lio_vf_rep_copy_packet(struct octeon_device *oct,
+		       struct sk_buff *skb,
+		       int len)
+{
+	if (likely(len > MIN_SKB_SIZE)) {
+		struct octeon_skb_page_info *pg_info;
+		unsigned char *va;
+
+		pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+		if (pg_info->page) {
+			va = page_address(pg_info->page) +
+				pg_info->page_offset;
+			memcpy(skb->data, va, MIN_SKB_SIZE);
+			skb_put(skb, MIN_SKB_SIZE);
+		}
+
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+				pg_info->page,
+				pg_info->page_offset + MIN_SKB_SIZE,
+				len - MIN_SKB_SIZE,
+				LIO_RXBUFFER_SZ);
+	} else {
+		struct octeon_skb_page_info *pg_info =
+			((struct octeon_skb_page_info *)(skb->cb));
+
+		skb_copy_to_linear_data(skb, page_address(pg_info->page) +
+					pg_info->page_offset, len);
+		skb_put(skb, len);
+		put_page(pg_info->page);
+	}
+}
+
+static int
+lio_vf_rep_pkt_recv(struct octeon_recv_info *recv_info, void *buf)
+{
+	struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
+	struct lio_vf_rep_desc *vf_rep;
+	struct net_device *vf_ndev;
+	struct octeon_device *oct;
+	union octeon_rh *rh;
+	struct sk_buff *skb;
+	int i, ifidx;
+
+	oct = lio_get_device(recv_pkt->octeon_id);
+	if (!oct)
+		goto free_buffers;
+
+	skb = recv_pkt->buffer_ptr[0];
+	rh = &recv_pkt->rh;
+	ifidx = rh->r.ossp;
+
+	vf_ndev = lio_vf_rep_get_ndev(oct, ifidx);
+	if (!vf_ndev)
+		goto free_buffers;
+
+	vf_rep = netdev_priv(vf_ndev);
+	if (!(atomic_read(&vf_rep->ifstate) & LIO_IFSTATE_RUNNING) ||
+	    recv_pkt->buffer_count > 1)
+		goto free_buffers;
+
+	skb->dev = vf_ndev;
+
+	/* Multiple buffers are not used for vf_rep packets.
+	 * So just buffer_size[0] is valid.
+	 */
+	lio_vf_rep_copy_packet(oct, skb, recv_pkt->buffer_size[0]);
+
+	skb_pull(skb, rh->r_dh.len * BYTES_PER_DHLEN_UNIT);
+	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->ip_summed = CHECKSUM_NONE;
+
+	netif_rx(skb);
+
+	octeon_free_recv_info(recv_info);
+
+	return 0;
+
+free_buffers:
+	for (i = 0; i < recv_pkt->buffer_count; i++)
+		recv_buffer_free(recv_pkt->buffer_ptr[i]);
+
+	octeon_free_recv_info(recv_info);
+
+	return 0;
+}
+
+static void
+lio_vf_rep_packet_sent_callback(struct octeon_device *oct,
+				u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct sk_buff *skb = sc->ctxptr;
+	struct net_device *ndev = skb->dev;
+
+	dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr,
+			 sc->datasize, DMA_TO_DEVICE);
+	dev_kfree_skb_any(skb);
+	octeon_free_soft_command(oct, sc);
+
+	if (octnet_iq_is_full(oct, sc->iq_no))
+		return;
+
+	if (netif_queue_stopped(ndev))
+		netif_wake_queue(ndev);
+}
+
+static int
+lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev);
+	struct net_device *parent_ndev = vf_rep->parent_ndev;
+	struct octeon_device *oct = vf_rep->oct;
+	struct octeon_instr_pki_ih3 *pki_ih3;
+	struct octeon_soft_command *sc;
+	struct lio *parent_lio;
+	int status;
+
+	parent_lio = GET_LIO(parent_ndev);
+
+	if (!(atomic_read(&vf_rep->ifstate) & LIO_IFSTATE_RUNNING) ||
+	    skb->len <= 0)
+		goto xmit_failed;
+
+	if (octnet_iq_is_full(vf_rep->oct, parent_lio->txq)) {
+		dev_err(&oct->pci_dev->dev, "VF rep: Device IQ full\n");
+		netif_stop_queue(ndev);
+		return NETDEV_TX_BUSY;
+	}
+
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, 0, 0, 0);
+	if (!sc) {
+		dev_err(&oct->pci_dev->dev, "VF rep: Soft command alloc failed\n");
+		goto xmit_failed;
+	}
+
+	/* Multiple buffers are not used for vf_rep packets. */
+	if (skb_shinfo(skb)->nr_frags != 0) {
+		dev_err(&oct->pci_dev->dev, "VF rep: nr_frags != 0. Dropping packet\n");
+		goto xmit_failed;
+	}
+
+	sc->dmadptr = dma_map_single(&oct->pci_dev->dev,
+				     skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(&oct->pci_dev->dev, sc->dmadptr)) {
+		dev_err(&oct->pci_dev->dev, "VF rep: DMA mapping failed\n");
+		goto xmit_failed;
+	}
+
+	sc->virtdptr = skb->data;
+	sc->datasize = skb->len;
+	sc->ctxptr = skb;
+	sc->iq_no = parent_lio->txq;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_VF_REP_PKT,
+				    vf_rep->ifidx, 0, 0);
+	pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
+	pki_ih3->tagtype = ORDERED_TAG;
+
+	sc->callback = lio_vf_rep_packet_sent_callback;
+	sc->callback_arg = sc;
+
+	status = octeon_send_soft_command(oct, sc);
+	if (status == IQ_SEND_FAILED) {
+		dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr,
+				 sc->datasize, DMA_TO_DEVICE);
+		goto xmit_failed;
+	}
+
+	if (status == IQ_SEND_STOP)
+		netif_stop_queue(ndev);
+
+	netif_trans_update(ndev);
+
+	return NETDEV_TX_OK;
+
+xmit_failed:
+	dev_kfree_skb_any(skb);
+
+	return NETDEV_TX_OK;
+}
+
+static int
+lio_vf_rep_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+	struct lio_vf_rep_desc *vf_rep = netdev_priv(dev);
+	struct net_device *parent_ndev = vf_rep->parent_ndev;
+	struct lio *lio = GET_LIO(parent_ndev);
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		attr->u.ppid.id_len = ETH_ALEN;
+		ether_addr_copy(attr->u.ppid.id,
+				(void *)&lio->linfo.hw_addr + 2);
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct switchdev_ops lio_vf_rep_switchdev_ops = {
+	.switchdev_port_attr_get        = lio_vf_rep_attr_get,
+};
+
+static void
+lio_vf_rep_fetch_stats(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct lio_vf_rep_desc *vf_rep = wk->ctxptr;
+	struct lio_vf_rep_stats stats;
+	struct lio_vf_rep_req rep_cfg;
+	struct octeon_device *oct;
+	int ret;
+
+	oct = vf_rep->oct;
+
+	memset(&rep_cfg, 0, sizeof(rep_cfg));
+	rep_cfg.req_type = LIO_VF_REP_REQ_STATS;
+	rep_cfg.ifidx = vf_rep->ifidx;
+
+	ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, sizeof(rep_cfg),
+					   &stats, sizeof(stats));
+
+	if (!ret) {
+		octeon_swap_8B_data((u64 *)&stats, (sizeof(stats) >> 3));
+		memcpy(&vf_rep->stats, &stats, sizeof(stats));
+	}
+
+	schedule_delayed_work(&vf_rep->stats_wk.work,
+			      msecs_to_jiffies(LIO_VF_REP_STATS_POLL_TIME_MS));
+}
+
+int
+lio_vf_rep_create(struct octeon_device *oct)
+{
+	struct lio_vf_rep_desc *vf_rep;
+	struct net_device *ndev;
+	int i, num_vfs;
+
+	if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return 0;
+
+	if (!oct->sriov_info.sriov_enabled)
+		return 0;
+
+	num_vfs = oct->sriov_info.num_vfs_alloced;
+
+	oct->vf_rep_list.num_vfs = 0;
+	for (i = 0; i < num_vfs; i++) {
+		ndev = alloc_etherdev(sizeof(struct lio_vf_rep_desc));
+
+		if (!ndev) {
+			dev_err(&oct->pci_dev->dev,
+				"VF rep device %d creation failed\n", i);
+			goto cleanup;
+		}
+
+		ndev->min_mtu = LIO_MIN_MTU_SIZE;
+		ndev->max_mtu = LIO_MAX_MTU_SIZE;
+		ndev->netdev_ops = &lio_vf_rep_ndev_ops;
+		SWITCHDEV_SET_OPS(ndev, &lio_vf_rep_switchdev_ops);
+
+		vf_rep = netdev_priv(ndev);
+		memset(vf_rep, 0, sizeof(*vf_rep));
+
+		vf_rep->ndev = ndev;
+		vf_rep->oct = oct;
+		vf_rep->parent_ndev = oct->props[0].netdev;
+		vf_rep->ifidx = (oct->pf_num * 64) + i + 1;
+
+		eth_hw_addr_random(ndev);
+
+		if (register_netdev(ndev)) {
+			dev_err(&oct->pci_dev->dev, "VF rep nerdev registration failed\n");
+
+			free_netdev(ndev);
+			goto cleanup;
+		}
+
+		netif_carrier_off(ndev);
+
+		INIT_DELAYED_WORK(&vf_rep->stats_wk.work,
+				  lio_vf_rep_fetch_stats);
+		vf_rep->stats_wk.ctxptr = (void *)vf_rep;
+		schedule_delayed_work(&vf_rep->stats_wk.work,
+				      msecs_to_jiffies
+				      (LIO_VF_REP_STATS_POLL_TIME_MS));
+		oct->vf_rep_list.num_vfs++;
+		oct->vf_rep_list.ndev[i] = ndev;
+	}
+
+	if (octeon_register_dispatch_fn(oct, OPCODE_NIC,
+					OPCODE_NIC_VF_REP_PKT,
+					lio_vf_rep_pkt_recv, oct)) {
+		dev_err(&oct->pci_dev->dev, "VF rep Dispatch func registration failed\n");
+
+		goto cleanup;
+	}
+
+	return 0;
+
+cleanup:
+	for (i = 0; i < oct->vf_rep_list.num_vfs; i++) {
+		ndev = oct->vf_rep_list.ndev[i];
+		oct->vf_rep_list.ndev[i] = NULL;
+		if (ndev) {
+			vf_rep = netdev_priv(ndev);
+			cancel_delayed_work_sync
+				(&vf_rep->stats_wk.work);
+			unregister_netdev(ndev);
+			free_netdev(ndev);
+		}
+	}
+
+	oct->vf_rep_list.num_vfs = 0;
+
+	return -1;
+}
+
+void
+lio_vf_rep_destroy(struct octeon_device *oct)
+{
+	struct lio_vf_rep_desc *vf_rep;
+	struct net_device *ndev;
+	int i;
+
+	if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	if (!oct->sriov_info.sriov_enabled)
+		return;
+
+	for (i = 0; i < oct->vf_rep_list.num_vfs; i++) {
+		ndev = oct->vf_rep_list.ndev[i];
+		oct->vf_rep_list.ndev[i] = NULL;
+		if (ndev) {
+			vf_rep = netdev_priv(ndev);
+			cancel_delayed_work_sync
+				(&vf_rep->stats_wk.work);
+			netif_tx_disable(ndev);
+			netif_carrier_off(ndev);
+
+			unregister_netdev(ndev);
+			free_netdev(ndev);
+		}
+	}
+
+	oct->vf_rep_list.num_vfs = 0;
+}
+
+static int
+lio_vf_rep_netdev_event(struct notifier_block *nb,
+			unsigned long event, void *ptr)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+	struct lio_vf_rep_desc *vf_rep;
+	struct lio_vf_rep_req rep_cfg;
+	struct octeon_device *oct;
+	int ret;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+	case NETDEV_CHANGENAME:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	if (ndev->netdev_ops != &lio_vf_rep_ndev_ops)
+		return NOTIFY_DONE;
+
+	vf_rep = netdev_priv(ndev);
+	oct = vf_rep->oct;
+
+	if (strlen(ndev->name) > LIO_IF_NAME_SIZE) {
+		dev_err(&oct->pci_dev->dev,
+			"Device name change sync failed as the size is > %d\n",
+			LIO_IF_NAME_SIZE);
+		return NOTIFY_DONE;
+	}
+
+	memset(&rep_cfg, 0, sizeof(rep_cfg));
+	rep_cfg.req_type = LIO_VF_REP_REQ_DEVNAME;
+	rep_cfg.ifidx = vf_rep->ifidx;
+	strncpy(rep_cfg.rep_name.name, ndev->name, LIO_IF_NAME_SIZE);
+
+	ret = lio_vf_rep_send_soft_command(oct, &rep_cfg,
+					   sizeof(rep_cfg), NULL, 0);
+	if (ret)
+		dev_err(&oct->pci_dev->dev,
+			"vf_rep netdev name change failed with err %d\n", ret);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block lio_vf_rep_netdev_notifier = {
+	.notifier_call = lio_vf_rep_netdev_event,
+};
+
+int
+lio_vf_rep_modinit(void)
+{
+	if (register_netdevice_notifier(&lio_vf_rep_netdev_notifier)) {
+		pr_err("netdev notifier registration failed\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+void
+lio_vf_rep_modexit(void)
+{
+	if (unregister_netdevice_notifier(&lio_vf_rep_netdev_notifier))
+		pr_err("netdev notifier unregister failed\n");
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h
new file mode 100644
index 000000000000..bb3cedc63c63
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h
@@ -0,0 +1,49 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2017 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium, Inc. for more information
+ **********************************************************************/
+
+/*! \file octeon_vf_main.h
+ *  \brief Host Driver: This file defines vf_rep related macros and structures
+ */
+#ifndef __LIO_VF_REP_H__
+#define __LIO_VF_REP_H__
+#define LIO_VF_REP_REQ_TMO_MS 5000
+#define LIO_VF_REP_STATS_POLL_TIME_MS 200
+
+struct lio_vf_rep_desc {
+	struct net_device *parent_ndev;
+	struct net_device *ndev;
+	struct octeon_device *oct;
+	struct lio_vf_rep_stats stats;
+	struct cavium_wk stats_wk;
+	atomic_t ifstate;
+	int ifidx;
+};
+
+struct lio_vf_rep_sc_ctx {
+	struct completion complete;
+};
+
+int lio_vf_rep_create(struct octeon_device *oct);
+void lio_vf_rep_destroy(struct octeon_device *oct);
+int lio_vf_rep_modinit(void);
+void lio_vf_rep_modexit(void);
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 3788c8cd082a..522dcc4dcff7 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -27,8 +27,8 @@
 
 #define LIQUIDIO_PACKAGE ""
 #define LIQUIDIO_BASE_MAJOR_VERSION 1
-#define LIQUIDIO_BASE_MINOR_VERSION 6
-#define LIQUIDIO_BASE_MICRO_VERSION 1
+#define LIQUIDIO_BASE_MINOR_VERSION 7
+#define LIQUIDIO_BASE_MICRO_VERSION 0
 #define LIQUIDIO_BASE_VERSION   __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
 				__stringify(LIQUIDIO_BASE_MINOR_VERSION)
 #define LIQUIDIO_MICRO_VERSION  "." __stringify(LIQUIDIO_BASE_MICRO_VERSION)
@@ -84,10 +84,14 @@ enum octeon_tag_type {
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
 #define OPCODE_NIC_INTRMOD_PARAMS      0x0B
+#define OPCODE_NIC_SYNC_OCTEON_TIME	0x14
 #define VF_DRV_LOADED                  1
 #define VF_DRV_REMOVED                -1
 #define VF_DRV_MACADDR_CHANGED         2
 
+#define OPCODE_NIC_VF_REP_PKT          0x15
+#define OPCODE_NIC_VF_REP_CMD          0x16
+
 #define CORE_DRV_TEST_SCATTER_OP    0xFFF5
 
 /* Application codes advertised by the core driver initialization packet. */
@@ -108,6 +112,10 @@ enum octeon_tag_type {
 
 #define SCR2_BIT_FW_LOADED	    63
 
+/* App specific capabilities from firmware to pf driver */
+#define LIQUIDIO_TIME_SYNC_CAP 0x1
+#define LIQUIDIO_SWITCHDEV_CAP 0x2
+
 static inline u32 incr_index(u32 index, u32 count, u32 max)
 {
 	if ((index + count) >= max)
@@ -901,4 +909,60 @@ union oct_nic_if_cfg {
 	} s;
 };
 
+struct lio_time {
+	s64 sec;   /* seconds */
+	s64 nsec;  /* nanoseconds */
+};
+
+struct lio_vf_rep_stats {
+	u64 tx_packets;
+	u64 tx_bytes;
+	u64 tx_dropped;
+
+	u64 rx_packets;
+	u64 rx_bytes;
+	u64 rx_dropped;
+};
+
+enum lio_vf_rep_req_type {
+	LIO_VF_REP_REQ_NONE,
+	LIO_VF_REP_REQ_STATE,
+	LIO_VF_REP_REQ_MTU,
+	LIO_VF_REP_REQ_STATS,
+	LIO_VF_REP_REQ_DEVNAME
+};
+
+enum {
+	LIO_VF_REP_STATE_DOWN,
+	LIO_VF_REP_STATE_UP
+};
+
+#define LIO_IF_NAME_SIZE 16
+struct lio_vf_rep_req {
+	u8 req_type;
+	u8 ifidx;
+	u8 rsvd[6];
+
+	union {
+		struct lio_vf_rep_name {
+			char name[LIO_IF_NAME_SIZE];
+		} rep_name;
+
+		struct lio_vf_rep_mtu {
+			u32 mtu;
+			u32 rsvd;
+		} rep_mtu;
+
+		struct lio_vf_rep_state {
+			u8 state;
+			u8 rsvd[7];
+		} rep_state;
+	};
+};
+
+struct lio_vf_rep_resp {
+	u64 rh;
+	u8  status;
+	u8  rsvd[7];
+};
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
index 78a3685f6fe0..5bf5e8791dfb 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
@@ -24,6 +24,7 @@
 #define LIO_FW_BASE_NAME        "lio_"
 #define LIO_FW_NAME_SUFFIX      ".bin"
 #define LIO_FW_NAME_TYPE_NIC    "nic"
+#define LIO_FW_NAME_TYPE_AUTO   "auto"
 #define LIO_FW_NAME_TYPE_NONE   "none"
 #define LIO_MAX_FIRMWARE_VERSION_LEN 16
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index 63bd9c94e547..ceac74388e09 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -37,6 +37,8 @@
 #define   MAX_OCTEON_LINKS	       MAX_OCTEON_NICIF
 #define   MAX_OCTEON_MULTICAST_ADDR    32
 
+#define   MAX_OCTEON_FILL_COUNT        8
+
 /* CN6xxx IQ configuration macros */
 #define   CN6XXX_MAX_INPUT_QUEUES      32
 #define   CN6XXX_MAX_IQ_DESCRIPTORS    2048
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index ec3dd69cd6b2..7f97ae48efed 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -803,15 +803,18 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num,
 }
 
 #define FBUF_SIZE	(4 * 1024 * 1024)
+#define MAX_BOOTTIME_SIZE    80
 
 int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
 			     size_t size)
 {
-	int ret = 0;
+	struct octeon_firmware_file_header *h;
+	char boottime[MAX_BOOTTIME_SIZE];
+	struct timespec64 ts;
 	u32 crc32_result;
 	u64 load_addr;
 	u32 image_len;
-	struct octeon_firmware_file_header *h;
+	int ret = 0;
 	u32 i, rem;
 
 	if (size < sizeof(struct octeon_firmware_file_header)) {
@@ -890,11 +893,34 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
 			load_addr += size;
 		}
 	}
+
+	/* Pass date and time information to NIC at the time of loading
+	 * firmware and periodically update the host time to NIC firmware.
+	 * This is to make NIC firmware use the same time reference as Host,
+	 * so that it is easy to correlate logs from firmware and host for
+	 * debugging.
+	 *
+	 * Octeon always uses UTC time. so timezone information is not sent.
+	 */
+	getnstimeofday64(&ts);
+	ret = snprintf(boottime, MAX_BOOTTIME_SIZE,
+		       " time_sec=%lld time_nsec=%ld",
+		       (s64)ts.tv_sec, ts.tv_nsec);
+	if ((sizeof(h->bootcmd) - strnlen(h->bootcmd, sizeof(h->bootcmd))) <
+		ret) {
+		dev_err(&oct->pci_dev->dev, "Boot command buffer too small\n");
+		return -EINVAL;
+	}
+	strncat(h->bootcmd, boottime,
+		sizeof(h->bootcmd) - strnlen(h->bootcmd, sizeof(h->bootcmd)));
+
 	dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n",
 		 h->bootcmd);
 
 	/* Invoke the bootcmd */
 	ret = octeon_console_send_cmd(oct, h->bootcmd, 50);
+	if (ret)
+		dev_info(&oct->pci_dev->dev, "Boot command send failed\n");
 
-	return 0;
+	return ret;
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 29d53b1763a7..2c615ab09e64 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -541,6 +541,7 @@ static char oct_dev_app_str[CVM_DRV_APP_COUNT + 1][32] = {
 
 static struct octeon_device *octeon_device[MAX_OCTEON_DEVICES];
 static atomic_t adapter_refcounts[MAX_OCTEON_DEVICES];
+static atomic_t adapter_fw_states[MAX_OCTEON_DEVICES];
 
 static u32 octeon_device_count;
 /* locks device array (i.e. octeon_device[]) */
@@ -770,6 +771,10 @@ int octeon_register_device(struct octeon_device *oct,
 	oct->adapter_refcount = &adapter_refcounts[oct->octeon_id];
 	atomic_set(oct->adapter_refcount, 0);
 
+	/* Like the reference count, the f/w state is shared 'per-adapter' */
+	oct->adapter_fw_state = &adapter_fw_states[oct->octeon_id];
+	atomic_set(oct->adapter_fw_state, FW_NEEDS_TO_BE_LOADED);
+
 	spin_lock(&octeon_devices_lock);
 	for (idx = (int)oct->octeon_id - 1; idx >= 0; idx--) {
 		if (!octeon_device[idx]) {
@@ -780,11 +785,15 @@ int octeon_register_device(struct octeon_device *oct,
 			atomic_inc(oct->adapter_refcount);
 			return 1; /* here, refcount is guaranteed to be 1 */
 		}
-		/* if another device is at same bus/dev, use its refcounter */
+		/* If another device is at same bus/dev, use its refcounter
+		 * (and f/w state variable).
+		 */
 		if ((octeon_device[idx]->loc.bus == bus) &&
 		    (octeon_device[idx]->loc.dev == dev)) {
 			oct->adapter_refcount =
 				octeon_device[idx]->adapter_refcount;
+			oct->adapter_fw_state =
+				octeon_device[idx]->adapter_fw_state;
 			break;
 		}
 	}
@@ -1171,6 +1180,10 @@ octeon_register_dispatch_fn(struct octeon_device *oct,
 		spin_unlock_bh(&oct->dispatch.lock);
 
 	} else {
+		if (pfn == fn &&
+		    octeon_get_dispatch_arg(oct, opcode, subcode) == fn_arg)
+			return 0;
+
 		dev_err(&oct->pci_dev->dev,
 			"Found previously registered dispatch fn for opcode/subcode: %x/%x\n",
 			opcode, subcode);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 894af199ddef..63b0c758a0a6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -23,6 +23,7 @@
 #define  _OCTEON_DEVICE_H_
 
 #include <linux/interrupt.h>
+#include <net/devlink.h>
 
 /** PCI VendorId Device Id */
 #define  OCTEON_CN68XX_PCIID          0x91177d
@@ -50,6 +51,13 @@ enum octeon_pci_swap_mode {
 	OCTEON_PCI_32BIT_LW_SWAP = 3
 };
 
+enum lio_fw_state {
+	FW_IS_PRELOADED = 0,
+	FW_NEEDS_TO_BE_LOADED = 1,
+	FW_IS_BEING_LOADED = 2,
+	FW_HAS_BEEN_LOADED = 3,
+};
+
 enum {
 	OCTEON_CONFIG_TYPE_DEFAULT = 0,
 	NUM_OCTEON_CONFS,
@@ -384,6 +392,15 @@ struct octeon_ioq_vector {
 	u32			ioq_num;
 };
 
+struct lio_vf_rep_list {
+	int num_vfs;
+	struct net_device *ndev[CN23XX_MAX_VFS_PER_PF];
+};
+
+struct lio_devlink_priv {
+	struct octeon_device *oct;
+};
+
 /** The Octeon device.
  *  Each Octeon device has this structure to represent all its
  *  components.
@@ -557,7 +574,14 @@ struct octeon_device {
 	} loc;
 
 	atomic_t *adapter_refcount; /* reference count of adapter */
+
+	atomic_t *adapter_fw_state; /* per-adapter, lio_fw_state */
+
 	bool ptp_enable;
+
+	struct lio_vf_rep_list vf_rep_list;
+	struct devlink *devlink;
+	enum devlink_eswitch_mode eswitch_mode;
 };
 
 #define  OCT_DRV_ONLINE 1
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 9372d4ce9954..3461d65ff4eb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -52,8 +52,8 @@ struct __dispatch {
  *  @return  Failure: NULL
  *
  */
-static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev,
-					    u16 opcode, u16 subcode)
+void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev,
+			      u16 opcode, u16 subcode)
 {
 	int idx;
 	struct list_head *dispatch;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index f91bc84d1719..815a9f56fd59 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -400,6 +400,9 @@ int octeon_register_dispatch_fn(struct octeon_device *oct,
 				u16 subcode,
 				octeon_dispatch_fn_t fn, void *fn_arg);
 
+void *octeon_get_dispatch_arg(struct octeon_device *oct,
+			      u16 opcode, u16 subcode);
+
 void octeon_droq_print_stats(void);
 
 u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index 5c3c8da976f7..81c987682941 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -343,6 +343,9 @@ int octeon_delete_instr_queue(struct octeon_device *octeon_dev, u32 iq_no);
 
 int lio_wait_for_instr_fetch(struct octeon_device *oct);
 
+void
+octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no);
+
 int
 octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
 				void (*fn)(void *));
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 32ef3a7d88d8..c846eec11a45 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -63,7 +63,7 @@ struct octnet_buf_free_info {
 };
 
 /* BQL-related functions */
-void octeon_report_sent_bytes_to_bql(void *buf, int reqtype);
+int octeon_report_sent_bytes_to_bql(void *buf, int reqtype);
 void octeon_update_tx_completion_counters(void *buf, int reqtype,
 					  unsigned int *pkts_compl,
 					  unsigned int *bytes_compl);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 9e36319cead6..f2d1a076a038 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -136,6 +136,9 @@ struct lio {
 	/* work queue for  link status */
 	struct cavium_wq	link_status_wq;
 
+	/* work queue to regularly send local time to octeon firmware */
+	struct cavium_wq	sync_octeon_time_wq;
+
 	int netdev_uc_count;
 };
 
@@ -195,7 +198,7 @@ static inline void
 	struct sk_buff *skb;
 	struct octeon_skb_page_info *skb_pg_info;
 
-	page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+	page = alloc_page(GFP_ATOMIC);
 	if (unlikely(!page))
 		return NULL;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index b457cf23fce6..150609bd8849 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -82,9 +82,10 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
 }
 
 int octnet_send_nic_data_pkt(struct octeon_device *oct,
-			     struct octnic_data_pkt *ndata)
+			     struct octnic_data_pkt *ndata,
+			     int xmit_more)
 {
-	int ring_doorbell = 1;
+	int ring_doorbell = !xmit_more;
 
 	return octeon_send_command(oct, ndata->q_no, ring_doorbell, &ndata->cmd,
 				   ndata->buf, ndata->datasize,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index 6480ef863441..de4130d26a98 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
@@ -279,7 +279,8 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
  * queue should be stopped, and IQ_SEND_OK if it sent okay.
  */
 int octnet_send_nic_data_pkt(struct octeon_device *oct,
-			     struct octnic_data_pkt *ndata);
+			     struct octnic_data_pkt *ndata,
+			     int xmit_more);
 
 /** Send a NIC control packet to the device
  * @param oct - octeon device pointer
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 1e0fbce86d60..e07d2093b971 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -278,6 +278,18 @@ ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
 	}
 }
 
+void
+octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no)
+{
+	struct octeon_instr_queue *iq;
+
+	iq = oct->instr_queue[iq_no];
+	spin_lock(&iq->post_lock);
+	if (iq->fill_cnt)
+		ring_doorbell(oct, iq);
+	spin_unlock(&iq->post_lock);
+}
+
 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
 				      u8 *cmd)
 {
@@ -477,8 +489,6 @@ octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
 		}
 
 		tot_inst_processed += inst_processed;
-		inst_processed = 0;
-
 	} while (tot_inst_processed < napi_budget);
 
 	if (napi_budget && (tot_inst_processed >= napi_budget))
@@ -543,6 +553,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 		    u32 force_db, void *cmd, void *buf,
 		    u32 datasize, u32 reqtype)
 {
+	int xmit_stopped;
 	struct iq_post_status st;
 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
 
@@ -554,12 +565,13 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 	st = __post_command2(iq, cmd);
 
 	if (st.status != IQ_SEND_FAILED) {
-		octeon_report_sent_bytes_to_bql(buf, reqtype);
+		xmit_stopped = octeon_report_sent_bytes_to_bql(buf, reqtype);
 		__add_to_request_list(iq, st.index, buf, reqtype);
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
 
-		if (force_db)
+		if (iq->fill_cnt >= MAX_OCTEON_FILL_COUNT || force_db ||
+		    xmit_stopped || st.status == IQ_SEND_STOP)
 			ring_doorbell(oct, iq);
 	} else {
 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 2887bcaf6af5..3f6afb54a5eb 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -705,14 +705,15 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev,
 			u64 clock_comp = (NSEC_PER_SEC << 32) /	octeon_get_io_clock_rate();
 			if (!ptp.s.ptp_en)
 				cvmx_write_csr(CVMX_MIO_PTP_CLOCK_COMP, clock_comp);
-			pr_info("PTP Clock: Using sclk reference at %lld Hz\n",
-				(NSEC_PER_SEC << 32) / clock_comp);
+			netdev_info(netdev,
+				    "PTP Clock using sclk reference @ %lldHz\n",
+				    (NSEC_PER_SEC << 32) / clock_comp);
 		} else {
 			/* The clock is already programmed to use a GPIO */
 			u64 clock_comp = cvmx_read_csr(CVMX_MIO_PTP_CLOCK_COMP);
-			pr_info("PTP Clock: Using GPIO %d at %lld Hz\n",
-				ptp.s.ext_clk_in,
-				(NSEC_PER_SEC << 32) / clock_comp);
+			netdev_info(netdev,
+				    "PTP Clock using GPIO%d @ %lld Hz\n",
+				    ptp.s.ext_clk_in, (NSEC_PER_SEC << 32) / clock_comp);
 		}
 
 		/* Enable the clock if it wasn't done already */
@@ -926,14 +927,11 @@ static void octeon_mgmt_adjust_link(struct net_device *netdev)
 	spin_unlock_irqrestore(&p->lock, flags);
 
 	if (link_changed != 0) {
-		if (link_changed > 0) {
-			pr_info("%s: Link is up - %d/%s\n", netdev->name,
-				phydev->speed,
-				phydev->duplex == DUPLEX_FULL ?
-				"Full" : "Half");
-		} else {
-			pr_info("%s: Link is down\n", netdev->name);
-		}
+		if (link_changed > 0)
+			netdev_info(netdev, "Link is up - %d/%s\n",
+				    phydev->speed, phydev->duplex == DUPLEX_FULL ? "Full" : "Half");
+		else
+			netdev_info(netdev, "Link is down\n");
 	}
 }
 
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index fb770b0182d3..8f1dd55b3e08 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -361,17 +361,8 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
 	}
 }
 
-static void nic_free_lmacmem(struct nicpf *nic)
+static void nic_get_hw_info(struct nicpf *nic)
 {
-	kfree(nic->vf_lmac_map);
-	kfree(nic->link);
-	kfree(nic->duplex);
-	kfree(nic->speed);
-}
-
-static int nic_get_hw_info(struct nicpf *nic)
-{
-	u8 max_lmac;
 	u16 sdevid;
 	struct hw_info *hw = nic->hw;
 
@@ -419,41 +410,16 @@ static int nic_get_hw_info(struct nicpf *nic)
 		break;
 	}
 	hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev);
-
-	/* Allocate memory for LMAC tracking elements */
-	max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX;
-	nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
-	if (!nic->vf_lmac_map)
-		goto error;
-	nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
-	if (!nic->link)
-		goto error;
-	nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
-	if (!nic->duplex)
-		goto error;
-	nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL);
-	if (!nic->speed)
-		goto error;
-	return 0;
-
-error:
-	nic_free_lmacmem(nic);
-	return -ENOMEM;
 }
 
 #define BGX0_BLOCK 8
 #define BGX1_BLOCK 9
 
-static int nic_init_hw(struct nicpf *nic)
+static void nic_init_hw(struct nicpf *nic)
 {
-	int i, err;
+	int i;
 	u64 cqm_cfg;
 
-	/* Get HW capability info */
-	err = nic_get_hw_info(nic);
-	if (err)
-		return err;
-
 	/* Enable NIC HW block */
 	nic_reg_write(nic, NIC_PF_CFG, 0x3);
 
@@ -498,8 +464,6 @@ static int nic_init_hw(struct nicpf *nic)
 	cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
 	if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
 		nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
-
-	return 0;
 }
 
 /* Channel parse index configuration */
@@ -584,9 +548,6 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
 static void nic_send_rss_size(struct nicpf *nic, int vf)
 {
 	union nic_mbx mbx = {};
-	u64  *msg;
-
-	msg = (u64 *)&mbx;
 
 	mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
 	mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size;
@@ -608,7 +569,6 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
 	rssi_base = nic->rssi_base[cfg->vf_id] + cfg->tbl_offset;
 
 	rssi = rssi_base;
-	qset = cfg->vf_id;
 
 	for (; rssi < (rssi_base + cfg->tbl_len); rssi++) {
 		u8 svf = cfg->ind_tbl[idx] >> 3;
@@ -1273,6 +1233,7 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
 	struct nicpf *nic;
+	u8     max_lmac;
 	int    err;
 
 	BUILD_BUG_ON(sizeof(union nic_mbx) > 16);
@@ -1282,10 +1243,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENOMEM;
 
 	nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL);
-	if (!nic->hw) {
-		devm_kfree(dev, nic);
+	if (!nic->hw)
 		return -ENOMEM;
-	}
 
 	pci_set_drvdata(pdev, nic);
 
@@ -1326,11 +1285,33 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	nic->node = nic_get_node_id(pdev);
 
-	/* Initialize hardware */
-	err = nic_init_hw(nic);
-	if (err)
+	/* Get HW capability info */
+	nic_get_hw_info(nic);
+
+	/* Allocate memory for LMAC tracking elements */
+	err = -ENOMEM;
+	max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
+
+	nic->vf_lmac_map = devm_kmalloc_array(dev, max_lmac, sizeof(u8),
+					      GFP_KERNEL);
+	if (!nic->vf_lmac_map)
+		goto err_release_regions;
+
+	nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->link)
+		goto err_release_regions;
+
+	nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->duplex)
+		goto err_release_regions;
+
+	nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
+	if (!nic->speed)
 		goto err_release_regions;
 
+	/* Initialize hardware */
+	nic_init_hw(nic);
+
 	nic_set_lmac_vf_mapping(nic);
 
 	/* Register interrupts */
@@ -1364,9 +1345,6 @@ err_unregister_interrupts:
 err_release_regions:
 	pci_release_regions(pdev);
 err_disable_device:
-	nic_free_lmacmem(nic);
-	devm_kfree(dev, nic->hw);
-	devm_kfree(dev, nic);
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	return err;
@@ -1388,10 +1366,6 @@ static void nic_remove(struct pci_dev *pdev)
 	nic_unregister_interrupts(nic);
 	pci_release_regions(pdev);
 
-	nic_free_lmacmem(nic);
-	devm_kfree(&pdev->dev, nic->hw);
-	devm_kfree(&pdev->dev, nic);
-
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 }
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 805ab45e9b5a..a063c36c4c58 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -523,6 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 
 	xdp.data_hard_start = page_address(page);
 	xdp.data = (void *)cpu_addr;
+	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + len;
 	orig_data = xdp.data;
 
@@ -1740,7 +1741,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
 	return 0;
 }
 
-static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 {
 	struct nicvf *nic = netdev_priv(netdev);
 
@@ -1773,7 +1774,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
 	.ndo_tx_timeout         = nicvf_tx_timeout,
 	.ndo_fix_features       = nicvf_fix_features,
 	.ndo_set_features       = nicvf_set_features,
-	.ndo_xdp		= nicvf_xdp,
+	.ndo_bpf		= nicvf_xdp,
 };
 
 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 0f13a7f7c1d3..30de26ef3da4 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1882,10 +1882,10 @@ send:
 /*
  * Callback for the Tx buffer reclaim timer.  Runs with softirqs disabled.
  */
-static void sge_tx_reclaim_cb(unsigned long data)
+static void sge_tx_reclaim_cb(struct timer_list *t)
 {
 	int i;
-	struct sge *sge = (struct sge *)data;
+	struct sge *sge = from_timer(sge, t, tx_reclaim_timer);
 
 	for (i = 0; i < SGE_CMDQ_N; ++i) {
 		struct cmdQ *q = &sge->cmdQ[i];
@@ -1978,10 +1978,10 @@ void t1_sge_start(struct sge *sge)
 /*
  * Callback for the T2 ESPI 'stuck packet feature' workaorund
  */
-static void espibug_workaround_t204(unsigned long data)
+static void espibug_workaround_t204(struct timer_list *t)
 {
-	struct adapter *adapter = (struct adapter *)data;
-	struct sge *sge = adapter->sge;
+	struct sge *sge = from_timer(sge, t, espibug_timer);
+	struct adapter *adapter = sge->adapter;
 	unsigned int nports = adapter->params.nports;
 	u32 seop[MAX_NPORTS];
 
@@ -2021,10 +2021,10 @@ static void espibug_workaround_t204(unsigned long data)
 	mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout);
 }
 
-static void espibug_workaround(unsigned long data)
+static void espibug_workaround(struct timer_list *t)
 {
-	struct adapter *adapter = (struct adapter *)data;
-	struct sge *sge = adapter->sge;
+	struct sge *sge = from_timer(sge, t, espibug_timer);
+	struct adapter *adapter = sge->adapter;
 
 	if (netif_running(adapter->port[0].dev)) {
 	        struct sk_buff *skb = sge->espibug_skb[0];
@@ -2075,19 +2075,15 @@ struct sge *t1_sge_create(struct adapter *adapter, struct sge_params *p)
 			goto nomem_port;
 	}
 
-	init_timer(&sge->tx_reclaim_timer);
-	sge->tx_reclaim_timer.data = (unsigned long)sge;
-	sge->tx_reclaim_timer.function = sge_tx_reclaim_cb;
+	timer_setup(&sge->tx_reclaim_timer, sge_tx_reclaim_cb, 0);
 
 	if (is_T2(sge->adapter)) {
-		init_timer(&sge->espibug_timer);
+		timer_setup(&sge->espibug_timer,
+			    adapter->params.nports > 1 ? espibug_workaround_t204 : espibug_workaround,
+			    0);
 
-		if (adapter->params.nports > 1) {
+		if (adapter->params.nports > 1)
 			tx_sched_init(sge);
-			sge->espibug_timer.function = espibug_workaround_t204;
-		} else
-			sge->espibug_timer.function = espibug_workaround;
-		sge->espibug_timer.data = (unsigned long)sge->adapter;
 
 		sge->espibug_timeout = 1;
 		/* for T204, every 10ms */
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index e2d342647b19..e988caa797cb 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -455,6 +455,11 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
 		q->pg_chunk.offset = 0;
 		mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
 				       0, q->alloc_size, PCI_DMA_FROMDEVICE);
+		if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) {
+			__free_pages(q->pg_chunk.page, order);
+			q->pg_chunk.page = NULL;
+			return -EIO;
+		}
 		q->pg_chunk.mapping = mapping;
 	}
 	sd->pg_chunk = q->pg_chunk;
@@ -949,40 +954,78 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 	return flits_to_desc(flits);
 }
 
+/*	map_skb - map a packet main body and its page fragments
+ *	@pdev: the PCI device
+ *	@skb: the packet
+ *	@addr: placeholder to save the mapped addresses
+ *
+ *	map the main body of an sk_buff and its page fragments, if any.
+ */
+static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
+		   dma_addr_t *addr)
+{
+	const skb_frag_t *fp, *end;
+	const struct skb_shared_info *si;
+
+	if (skb_headlen(skb)) {
+		*addr = pci_map_single(pdev, skb->data, skb_headlen(skb),
+				       PCI_DMA_TODEVICE);
+		if (pci_dma_mapping_error(pdev, *addr))
+			goto out_err;
+		addr++;
+	}
+
+	si = skb_shinfo(skb);
+	end = &si->frags[si->nr_frags];
+
+	for (fp = si->frags; fp < end; fp++) {
+		*addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),
+					 DMA_TO_DEVICE);
+		if (pci_dma_mapping_error(pdev, *addr))
+			goto unwind;
+		addr++;
+	}
+	return 0;
+
+unwind:
+	while (fp-- > si->frags)
+		dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
+			       DMA_TO_DEVICE);
+
+	pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE);
+out_err:
+	return -ENOMEM;
+}
+
 /**
- *	make_sgl - populate a scatter/gather list for a packet
+ *	write_sgl - populate a scatter/gather list for a packet
  *	@skb: the packet
  *	@sgp: the SGL to populate
  *	@start: start address of skb main body data to include in the SGL
  *	@len: length of skb main body data to include in the SGL
- *	@pdev: the PCI device
+ *	@addr: the list of the mapped addresses
  *
- *	Generates a scatter/gather list for the buffers that make up a packet
+ *	Copies the scatter/gather list for the buffers that make up a packet
  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
  *	appropriately.
  */
-static inline unsigned int make_sgl(const struct sk_buff *skb,
-				    struct sg_ent *sgp, unsigned char *start,
-				    unsigned int len, struct pci_dev *pdev)
+static inline unsigned int write_sgl(const struct sk_buff *skb,
+				     struct sg_ent *sgp, unsigned char *start,
+				     unsigned int len, const dma_addr_t *addr)
 {
-	dma_addr_t mapping;
-	unsigned int i, j = 0, nfrags;
+	unsigned int i, j = 0, k = 0, nfrags;
 
 	if (len) {
-		mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 		sgp->len[0] = cpu_to_be32(len);
-		sgp->addr[0] = cpu_to_be64(mapping);
-		j = 1;
+		sgp->addr[j++] = cpu_to_be64(addr[k++]);
 	}
 
 	nfrags = skb_shinfo(skb)->nr_frags;
 	for (i = 0; i < nfrags; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-		mapping = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag),
-					   DMA_TO_DEVICE);
 		sgp->len[j] = cpu_to_be32(skb_frag_size(frag));
-		sgp->addr[j] = cpu_to_be64(mapping);
+		sgp->addr[j] = cpu_to_be64(addr[k++]);
 		j ^= 1;
 		if (j == 0)
 			++sgp;
@@ -1138,7 +1181,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 			    const struct port_info *pi,
 			    unsigned int pidx, unsigned int gen,
 			    struct sge_txq *q, unsigned int ndesc,
-			    unsigned int compl)
+			    unsigned int compl, const dma_addr_t *addr)
 {
 	unsigned int flits, sgl_flits, cntrl, tso_info;
 	struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
@@ -1196,7 +1239,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 	}
 
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
+	sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr);
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
 			 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
@@ -1227,6 +1270,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct netdev_queue *txq;
 	struct sge_qset *qs;
 	struct sge_txq *q;
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
 
 	/*
 	 * The chip min packet length is 9 octets but play safe and reject
@@ -1255,6 +1299,14 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_BUSY;
 	}
 
+	/* Check if ethernet packet can't be sent as immediate data */
+	if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) {
+		if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) {
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+		}
+	}
+
 	q->in_use += ndesc;
 	if (unlikely(credits - ndesc < q->stop_thres)) {
 		t3_stop_tx_queue(txq, qs, q);
@@ -1312,7 +1364,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (likely(!skb_shared(skb)))
 		skb_orphan(skb);
 
-	write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
+	write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr);
 	check_ring_tx_db(adap, q);
 	return NETDEV_TX_OK;
 }
@@ -1577,7 +1629,8 @@ static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
  */
 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 			  struct sge_txq *q, unsigned int pidx,
-			  unsigned int gen, unsigned int ndesc)
+			  unsigned int gen, unsigned int ndesc,
+			  const dma_addr_t *addr)
 {
 	unsigned int sgl_flits, flits;
 	struct work_request_hdr *from;
@@ -1598,10 +1651,9 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
-			     skb_tail_pointer(skb) -
-			     skb_transport_header(skb),
-			     adap->pdev);
+	sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb),
+			      skb_tail_pointer(skb) - skb_transport_header(skb),
+			      addr);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
@@ -1659,6 +1711,12 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 		goto again;
 	}
 
+	if (!immediate(skb) &&
+	    map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) {
+		spin_unlock(&q->lock);
+		return NET_XMIT_SUCCESS;
+	}
+
 	gen = q->gen;
 	q->in_use += ndesc;
 	pidx = q->pidx;
@@ -1669,7 +1727,7 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 	}
 	spin_unlock(&q->lock);
 
-	write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
+	write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head);
 	check_ring_tx_db(adap, q);
 	return NET_XMIT_SUCCESS;
 }
@@ -1687,6 +1745,7 @@ static void restart_offloadq(unsigned long data)
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
 	const struct port_info *pi = netdev_priv(qs->netdev);
 	struct adapter *adap = pi->adapter;
+	unsigned int written = 0;
 
 	spin_lock(&q->lock);
 again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
@@ -1706,10 +1765,15 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 			break;
 		}
 
+		if (!immediate(skb) &&
+		    map_skb(adap->pdev, skb, (dma_addr_t *)skb->head))
+			break;
+
 		gen = q->gen;
 		q->in_use += ndesc;
 		pidx = q->pidx;
 		q->pidx += ndesc;
+		written += ndesc;
 		if (q->pidx >= q->size) {
 			q->pidx -= q->size;
 			q->gen ^= 1;
@@ -1717,7 +1781,8 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 		__skb_unlink(skb, &q->sendq);
 		spin_unlock(&q->lock);
 
-		write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
+		write_ofld_wr(adap, skb, q, pidx, gen, ndesc,
+			      (dma_addr_t *)skb->head);
 		spin_lock(&q->lock);
 	}
 	spin_unlock(&q->lock);
@@ -1727,8 +1792,9 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
 #endif
 	wmb();
-	t3_write_reg(adap, A_SG_KDOORBELL,
-		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+	if (likely(written))
+		t3_write_reg(adap, A_SG_KDOORBELL,
+			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 }
 
 /**
@@ -2853,9 +2919,9 @@ void t3_sge_err_intr_handler(struct adapter *adapter)
  *	bother cleaning them up here.
  *
  */
-static void sge_timer_tx(unsigned long data)
+static void sge_timer_tx(struct timer_list *t)
 {
-	struct sge_qset *qs = (struct sge_qset *)data;
+	struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer);
 	struct port_info *pi = netdev_priv(qs->netdev);
 	struct adapter *adap = pi->adapter;
 	unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
@@ -2893,10 +2959,10 @@ static void sge_timer_tx(unsigned long data)
  *	starved.
  *
  */
-static void sge_timer_rx(unsigned long data)
+static void sge_timer_rx(struct timer_list *t)
 {
 	spinlock_t *lock;
-	struct sge_qset *qs = (struct sge_qset *)data;
+	struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer);
 	struct port_info *pi = netdev_priv(qs->netdev);
 	struct adapter *adap = pi->adapter;
 	u32 status;
@@ -2976,8 +3042,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 	struct sge_qset *q = &adapter->sge.qs[id];
 
 	init_qset_cntxt(q, id);
-	setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
-	setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
+	timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0);
+	timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0);
 
 	q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
 				   sizeof(struct rx_desc),
diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h b/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h
index 705713b56636..3c3e6cf6aca6 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h
@@ -60,7 +60,7 @@ struct t3cdev {
 	int (*ctl)(struct t3cdev *dev, unsigned int req, void *data);
 	void (*neigh_update)(struct t3cdev *dev, struct neighbour *neigh);
 	void *priv;		/* driver private data */
-	void *l2opt;		/* optional layer 2 data */
+	void __rcu *l2opt;	/* optional layer 2 data */
 	void *l3opt;		/* optional layer 3 data */
 	void *l4opt;		/* optional layer 4 data */
 	void *ulp;		/* ulp stuff */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 24143c8abfdb..8c9c6b0d2e5d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -5,7 +5,10 @@
 
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
-cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4_ptp.o
+cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
+	      cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
+	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
+	      cudbg_common.o cudbg_lib.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index 3103ef9b561d..290039026ece 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -96,7 +96,8 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
 		if (!ret) {
 			ce = cte;
 			read_unlock_bh(&ctbl->lock);
-			goto found;
+			refcount_inc(&ce->refcnt);
+			return 0;
 		}
 	}
 	read_unlock_bh(&ctbl->lock);
@@ -108,7 +109,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
 		list_del(&ce->list);
 		INIT_LIST_HEAD(&ce->list);
 		spin_lock_init(&ce->lock);
-		atomic_set(&ce->refcnt, 0);
+		refcount_set(&ce->refcnt, 0);
 		atomic_dec(&ctbl->nfree);
 		list_add_tail(&ce->list, &ctbl->hash_list[hash]);
 		if (v6) {
@@ -138,9 +139,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
 		return -ENOMEM;
 	}
 	write_unlock_bh(&ctbl->lock);
-found:
-	atomic_inc(&ce->refcnt);
-
+	refcount_set(&ce->refcnt, 1);
 	return 0;
 }
 EXPORT_SYMBOL(cxgb4_clip_get);
@@ -179,7 +178,7 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6)
 found:
 	write_lock_bh(&ctbl->lock);
 	spin_lock_bh(&ce->lock);
-	if (atomic_dec_and_test(&ce->refcnt)) {
+	if (refcount_dec_and_test(&ce->refcnt)) {
 		list_del(&ce->list);
 		INIT_LIST_HEAD(&ce->list);
 		list_add_tail(&ce->list, &ctbl->ce_free_head);
@@ -266,7 +265,7 @@ int clip_tbl_show(struct seq_file *seq, void *v)
 			ip[0] = '\0';
 			sprintf(ip, "%pISc", &ce->addr);
 			seq_printf(seq, "%-25s   %u\n", ip,
-				   atomic_read(&ce->refcnt));
+				   refcount_read(&ce->refcnt));
 		}
 	}
 	seq_printf(seq, "Free clip entries : %d\n", atomic_read(&ctbl->nfree));
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
index 35eb43c6bcbb..a0e0ae19649f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
@@ -10,9 +10,11 @@
  *  release for licensing terms and conditions.
  */
 
+#include <linux/refcount.h>
+
 struct clip_entry {
 	spinlock_t lock;	/* Hold while modifying clip reference */
-	atomic_t refcnt;
+	refcount_t refcnt;
 	struct list_head list;
 	union {
 		struct sockaddr_in addr;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c
new file mode 100644
index 000000000000..f78ba1743b5a
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c
@@ -0,0 +1,54 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+
+int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size,
+		   struct cudbg_buffer *pin_buff)
+{
+	u32 offset;
+
+	offset = pdbg_buff->offset;
+	if (offset + size > pdbg_buff->size)
+		return CUDBG_STATUS_NO_MEM;
+
+	pin_buff->data = (char *)pdbg_buff->data + offset;
+	pin_buff->offset = offset;
+	pin_buff->size = size;
+	pdbg_buff->size -= size;
+	return 0;
+}
+
+void cudbg_put_buff(struct cudbg_buffer *pin_buff,
+		    struct cudbg_buffer *pdbg_buff)
+{
+	pdbg_buff->size += pin_buff->size;
+	pin_buff->data = NULL;
+	pin_buff->offset = 0;
+	pin_buff->size = 0;
+}
+
+void cudbg_update_buff(struct cudbg_buffer *pin_buff,
+		       struct cudbg_buffer *pout_buff)
+{
+	/* We already write to buffer provided by ethool, so just
+	 * increment offset to next free space.
+	 */
+	pout_buff->offset += pin_buff->size;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
new file mode 100644
index 000000000000..605689957496
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -0,0 +1,384 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_ENTITY_H__
+#define __CUDBG_ENTITY_H__
+
+#define EDC0_FLAG 3
+#define EDC1_FLAG 4
+
+#define CUDBG_ENTITY_SIGNATURE 0xCCEDB001
+
+struct card_mem {
+	u16 size_edc0;
+	u16 size_edc1;
+	u16 mem_flag;
+};
+
+struct cudbg_mbox_log {
+	struct mbox_cmd entry;
+	u32 hi[MBOX_LEN / 8];
+	u32 lo[MBOX_LEN / 8];
+};
+
+struct cudbg_cim_qcfg {
+	u8 chip;
+	u16 base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
+	u16 size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
+	u16 thres[CIM_NUM_IBQ];
+	u32 obq_wr[2 * CIM_NUM_OBQ_T5];
+	u32 stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)];
+};
+
+struct cudbg_rss_vf_conf {
+	u32 rss_vf_vfl;
+	u32 rss_vf_vfh;
+};
+
+struct cudbg_pm_stats {
+	u32 tx_cnt[T6_PM_NSTATS];
+	u32 rx_cnt[T6_PM_NSTATS];
+	u64 tx_cyc[T6_PM_NSTATS];
+	u64 rx_cyc[T6_PM_NSTATS];
+};
+
+struct cudbg_hw_sched {
+	u32 kbps[NTX_SCHED];
+	u32 ipg[NTX_SCHED];
+	u32 pace_tab[NTX_SCHED];
+	u32 mode;
+	u32 map;
+};
+
+struct ireg_field {
+	u32 ireg_addr;
+	u32 ireg_data;
+	u32 ireg_local_offset;
+	u32 ireg_offset_range;
+};
+
+struct ireg_buf {
+	struct ireg_field tp_pio;
+	u32 outbuf[32];
+};
+
+struct cudbg_ulprx_la {
+	u32 data[ULPRX_LA_SIZE * 8];
+	u32 size;
+};
+
+struct cudbg_tp_la {
+	u32 size;
+	u32 mode;
+	u8 data[0];
+};
+
+struct cudbg_cim_pif_la {
+	int size;
+	u8 data[0];
+};
+
+struct cudbg_clk_info {
+	u64 retransmit_min;
+	u64 retransmit_max;
+	u64 persist_timer_min;
+	u64 persist_timer_max;
+	u64 keepalive_idle_timer;
+	u64 keepalive_interval;
+	u64 initial_srtt;
+	u64 finwait2_timer;
+	u32 dack_timer;
+	u32 res;
+	u32 cclk_ps;
+	u32 tre;
+	u32 dack_re;
+};
+
+struct cudbg_tid_info_region {
+	u32 ntids;
+	u32 nstids;
+	u32 stid_base;
+	u32 hash_base;
+
+	u32 natids;
+	u32 nftids;
+	u32 ftid_base;
+	u32 aftid_base;
+	u32 aftid_end;
+
+	u32 sftid_base;
+	u32 nsftids;
+
+	u32 uotid_base;
+	u32 nuotids;
+
+	u32 sb;
+	u32 flags;
+	u32 le_db_conf;
+	u32 ip_users;
+	u32 ipv6_users;
+
+	u32 hpftid_base;
+	u32 nhpftids;
+};
+
+#define CUDBG_TID_INFO_REV 1
+
+struct cudbg_tid_info_region_rev1 {
+	struct cudbg_ver_hdr ver_hdr;
+	struct cudbg_tid_info_region tid;
+	u32 tid_start;
+	u32 reserved[16];
+};
+
+#define CUDBG_MAX_FL_QIDS 1024
+
+struct cudbg_ch_cntxt {
+	u32 cntxt_type;
+	u32 cntxt_id;
+	u32 data[SGE_CTXT_SIZE / 4];
+};
+
+#define CUDBG_MAX_RPLC_SIZE 128
+
+struct cudbg_mps_tcam {
+	u64 mask;
+	u32 rplc[8];
+	u32 idx;
+	u32 cls_lo;
+	u32 cls_hi;
+	u32 rplc_size;
+	u32 vniy;
+	u32 vnix;
+	u32 dip_hit;
+	u32 vlan_vld;
+	u32 repli;
+	u16 ivlan;
+	u8 addr[ETH_ALEN];
+	u8 lookup_type;
+	u8 port_num;
+	u8 reserved[2];
+};
+
+#define CUDBG_VPD_PF_SIZE 0x800
+#define CUDBG_SCFG_VER_ADDR 0x06
+#define CUDBG_SCFG_VER_LEN 4
+#define CUDBG_VPD_VER_ADDR 0x18c7
+#define CUDBG_VPD_VER_LEN 2
+
+struct cudbg_vpd_data {
+	u8 sn[SERNUM_LEN + 1];
+	u8 bn[PN_LEN + 1];
+	u8 na[MACADDR_LEN + 1];
+	u8 mn[ID_LEN + 1];
+	u16 fw_major;
+	u16 fw_minor;
+	u16 fw_micro;
+	u16 fw_build;
+	u32 scfg_vers;
+	u32 vpd_vers;
+};
+
+#define CUDBG_MAX_TCAM_TID 0x800
+
+enum cudbg_le_entry_types {
+	LE_ET_UNKNOWN = 0,
+	LE_ET_TCAM_CON = 1,
+	LE_ET_TCAM_SERVER = 2,
+	LE_ET_TCAM_FILTER = 3,
+	LE_ET_TCAM_CLIP = 4,
+	LE_ET_TCAM_ROUTING = 5,
+	LE_ET_HASH_CON = 6,
+	LE_ET_INVALID_TID = 8,
+};
+
+struct cudbg_tcam {
+	u32 filter_start;
+	u32 server_start;
+	u32 clip_start;
+	u32 routing_start;
+	u32 tid_hash_base;
+	u32 max_tid;
+};
+
+struct cudbg_tid_data {
+	u32 tid;
+	u32 dbig_cmd;
+	u32 dbig_conf;
+	u32 dbig_rsp_stat;
+	u32 data[NUM_LE_DB_DBGI_RSP_DATA_INSTANCES];
+};
+
+#define CUDBG_NUM_ULPTX 11
+#define CUDBG_NUM_ULPTX_READ 512
+
+struct cudbg_ulptx_la {
+	u32 rdptr[CUDBG_NUM_ULPTX];
+	u32 wrptr[CUDBG_NUM_ULPTX];
+	u32 rddata[CUDBG_NUM_ULPTX];
+	u32 rd_data[CUDBG_NUM_ULPTX][CUDBG_NUM_ULPTX_READ];
+};
+
+#define CUDBG_CHAC_PBT_ADDR 0x2800
+#define CUDBG_CHAC_PBT_LRF  0x3000
+#define CUDBG_CHAC_PBT_DATA 0x3800
+#define CUDBG_PBT_DYNAMIC_ENTRIES 8
+#define CUDBG_PBT_STATIC_ENTRIES 16
+#define CUDBG_LRF_ENTRIES 8
+#define CUDBG_PBT_DATA_ENTRIES 512
+
+struct cudbg_pbt_tables {
+	u32 pbt_dynamic[CUDBG_PBT_DYNAMIC_ENTRIES];
+	u32 pbt_static[CUDBG_PBT_STATIC_ENTRIES];
+	u32 lrf_table[CUDBG_LRF_ENTRIES];
+	u32 pbt_data[CUDBG_PBT_DATA_ENTRIES];
+};
+
+#define IREG_NUM_ELEM 4
+
+static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */
+	{0x7e40, 0x7e44, 0x040, 10}, /* t6_tp_pio_regs_40_to_49 */
+	{0x7e40, 0x7e44, 0x050, 10}, /* t6_tp_pio_regs_50_to_59 */
+	{0x7e40, 0x7e44, 0x060, 14}, /* t6_tp_pio_regs_60_to_6d */
+	{0x7e40, 0x7e44, 0x06F, 1}, /* t6_tp_pio_regs_6f */
+	{0x7e40, 0x7e44, 0x070, 6}, /* t6_tp_pio_regs_70_to_75 */
+	{0x7e40, 0x7e44, 0x130, 18}, /* t6_tp_pio_regs_130_to_141 */
+	{0x7e40, 0x7e44, 0x145, 19}, /* t6_tp_pio_regs_145_to_157 */
+	{0x7e40, 0x7e44, 0x160, 1}, /* t6_tp_pio_regs_160 */
+	{0x7e40, 0x7e44, 0x230, 25}, /* t6_tp_pio_regs_230_to_248 */
+	{0x7e40, 0x7e44, 0x24a, 3}, /* t6_tp_pio_regs_24c */
+	{0x7e40, 0x7e44, 0x8C0, 1} /* t6_tp_pio_regs_8c0 */
+};
+
+static const u32 t5_tp_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e40, 0x7e44, 0x020, 28}, /* t5_tp_pio_regs_20_to_3b */
+	{0x7e40, 0x7e44, 0x040, 19}, /* t5_tp_pio_regs_40_to_52 */
+	{0x7e40, 0x7e44, 0x054, 2}, /* t5_tp_pio_regs_54_to_55 */
+	{0x7e40, 0x7e44, 0x060, 13}, /* t5_tp_pio_regs_60_to_6c */
+	{0x7e40, 0x7e44, 0x06F, 1}, /* t5_tp_pio_regs_6f */
+	{0x7e40, 0x7e44, 0x120, 4}, /* t5_tp_pio_regs_120_to_123 */
+	{0x7e40, 0x7e44, 0x12b, 2}, /* t5_tp_pio_regs_12b_to_12c */
+	{0x7e40, 0x7e44, 0x12f, 21}, /* t5_tp_pio_regs_12f_to_143 */
+	{0x7e40, 0x7e44, 0x145, 19}, /* t5_tp_pio_regs_145_to_157 */
+	{0x7e40, 0x7e44, 0x230, 25}, /* t5_tp_pio_regs_230_to_248 */
+	{0x7e40, 0x7e44, 0x8C0, 1} /* t5_tp_pio_regs_8c0 */
+};
+
+static const u32 t6_tp_tm_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e18, 0x7e1c, 0x0, 12}
+};
+
+static const u32 t5_tp_tm_pio_array[][IREG_NUM_ELEM] = {
+	{0x7e18, 0x7e1c, 0x0, 12}
+};
+
+static const u32 t6_tp_mib_index_array[6][IREG_NUM_ELEM] = {
+	{0x7e50, 0x7e54, 0x0, 13},
+	{0x7e50, 0x7e54, 0x10, 6},
+	{0x7e50, 0x7e54, 0x18, 21},
+	{0x7e50, 0x7e54, 0x30, 32},
+	{0x7e50, 0x7e54, 0x50, 22},
+	{0x7e50, 0x7e54, 0x68, 12}
+};
+
+static const u32 t5_tp_mib_index_array[9][IREG_NUM_ELEM] = {
+	{0x7e50, 0x7e54, 0x0, 13},
+	{0x7e50, 0x7e54, 0x10, 6},
+	{0x7e50, 0x7e54, 0x18, 8},
+	{0x7e50, 0x7e54, 0x20, 13},
+	{0x7e50, 0x7e54, 0x30, 16},
+	{0x7e50, 0x7e54, 0x40, 16},
+	{0x7e50, 0x7e54, 0x50, 16},
+	{0x7e50, 0x7e54, 0x60, 6},
+	{0x7e50, 0x7e54, 0x68, 4}
+};
+
+static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = {
+	{0x10cc, 0x10d0, 0x0, 16},
+	{0x10cc, 0x10d4, 0x0, 16},
+};
+
+static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = {
+	{0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */
+	{0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */
+	{0x5a04, 0x5a0c, 0x41, 0x10}, /* t5_pcie_pdbg_regs_41_to_50 */
+};
+
+static const u32 t5_pcie_cdbg_array[][IREG_NUM_ELEM] = {
+	{0x5a10, 0x5a18, 0x00, 0x20}, /* t5_pcie_cdbg_regs_00_to_20 */
+	{0x5a10, 0x5a18, 0x21, 0x18}, /* t5_pcie_cdbg_regs_21_to_37 */
+};
+
+static const u32 t5_pm_rx_array[][IREG_NUM_ELEM] = {
+	{0x8FD0, 0x8FD4, 0x10000, 0x20}, /* t5_pm_rx_regs_10000_to_10020 */
+	{0x8FD0, 0x8FD4, 0x10021, 0x0D}, /* t5_pm_rx_regs_10021_to_1002c */
+};
+
+static const u32 t5_pm_tx_array[][IREG_NUM_ELEM] = {
+	{0x8FF0, 0x8FF4, 0x10000, 0x20}, /* t5_pm_tx_regs_10000_to_10020 */
+	{0x8FF0, 0x8FF4, 0x10021, 0x1D}, /* t5_pm_tx_regs_10021_to_1003c */
+};
+
+static const u32 t6_ma_ireg_array[][IREG_NUM_ELEM] = {
+	{0x78f8, 0x78fc, 0xa000, 23}, /* t6_ma_regs_a000_to_a016 */
+	{0x78f8, 0x78fc, 0xa400, 30}, /* t6_ma_regs_a400_to_a41e */
+	{0x78f8, 0x78fc, 0xa800, 20} /* t6_ma_regs_a800_to_a813 */
+};
+
+static const u32 t6_ma_ireg_array2[][IREG_NUM_ELEM] = {
+	{0x78f8, 0x78fc, 0xe400, 17}, /* t6_ma_regs_e400_to_e600 */
+	{0x78f8, 0x78fc, 0xe640, 13} /* t6_ma_regs_e640_to_e7c0 */
+};
+
+static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM] = {
+	{0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */
+	{0x7b50, 0x7b54, 0x2080, 0x1d}, /* up_cim_2080_to_20fc */
+	{0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */
+	{0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */
+	{0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */
+	{0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */
+	{0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */
+	{0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */
+	{0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */
+	{0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */
+	{0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */
+	{0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */
+	{0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */
+
+};
+
+static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM] = {
+	{0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */
+	{0x7b50, 0x7b54, 0x2080, 0x19}, /* up_cim_2080_to_20ec */
+	{0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */
+	{0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */
+	{0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */
+	{0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */
+	{0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */
+	{0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */
+	{0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */
+	{0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */
+	{0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */
+	{0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */
+	{0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */
+};
+
+static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
+	{0x51320, 0x51324, 0xa000, 32} /* t6_hma_regs_a000_to_a01f */
+};
+#endif /* __CUDBG_ENTITY_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
new file mode 100644
index 000000000000..e10ff1ee62c5
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
@@ -0,0 +1,90 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_IF_H__
+#define __CUDBG_IF_H__
+
+/* Error codes */
+#define CUDBG_STATUS_NO_MEM -19
+#define CUDBG_STATUS_ENTITY_NOT_FOUND -24
+#define CUDBG_SYSTEM_ERROR -29
+#define CUDBG_STATUS_CCLK_NOT_DEFINED -32
+
+#define CUDBG_MAJOR_VERSION 1
+#define CUDBG_MINOR_VERSION 14
+
+enum cudbg_dbg_entity_type {
+	CUDBG_REG_DUMP = 1,
+	CUDBG_DEV_LOG = 2,
+	CUDBG_CIM_LA = 3,
+	CUDBG_CIM_MA_LA = 4,
+	CUDBG_CIM_QCFG = 5,
+	CUDBG_CIM_IBQ_TP0 = 6,
+	CUDBG_CIM_IBQ_TP1 = 7,
+	CUDBG_CIM_IBQ_ULP = 8,
+	CUDBG_CIM_IBQ_SGE0 = 9,
+	CUDBG_CIM_IBQ_SGE1 = 10,
+	CUDBG_CIM_IBQ_NCSI = 11,
+	CUDBG_CIM_OBQ_ULP0 = 12,
+	CUDBG_CIM_OBQ_ULP1 = 13,
+	CUDBG_CIM_OBQ_ULP2 = 14,
+	CUDBG_CIM_OBQ_ULP3 = 15,
+	CUDBG_CIM_OBQ_SGE = 16,
+	CUDBG_CIM_OBQ_NCSI = 17,
+	CUDBG_EDC0 = 18,
+	CUDBG_EDC1 = 19,
+	CUDBG_RSS = 22,
+	CUDBG_RSS_VF_CONF = 25,
+	CUDBG_PATH_MTU = 27,
+	CUDBG_PM_STATS = 30,
+	CUDBG_HW_SCHED = 31,
+	CUDBG_TP_INDIRECT = 36,
+	CUDBG_SGE_INDIRECT = 37,
+	CUDBG_ULPRX_LA = 41,
+	CUDBG_TP_LA = 43,
+	CUDBG_CIM_PIF_LA = 45,
+	CUDBG_CLK = 46,
+	CUDBG_CIM_OBQ_RXQ0 = 47,
+	CUDBG_CIM_OBQ_RXQ1 = 48,
+	CUDBG_PCIE_INDIRECT = 50,
+	CUDBG_PM_INDIRECT = 51,
+	CUDBG_TID_INFO = 54,
+	CUDBG_DUMP_CONTEXT = 56,
+	CUDBG_MPS_TCAM = 57,
+	CUDBG_VPD_DATA = 58,
+	CUDBG_LE_TCAM = 59,
+	CUDBG_CCTRL = 60,
+	CUDBG_MA_INDIRECT = 61,
+	CUDBG_ULPTX_LA = 62,
+	CUDBG_UP_CIM_INDIRECT = 64,
+	CUDBG_PBT_TABLE = 65,
+	CUDBG_MBOX_LOG = 66,
+	CUDBG_HMA_INDIRECT = 67,
+	CUDBG_MAX_ENTITY = 70,
+};
+
+struct cudbg_init {
+	struct adapter *adap; /* Pointer to adapter structure */
+	void *outbuf; /* Output buffer */
+	u32 outbuf_size;  /* Output buffer size */
+};
+
+static inline unsigned int cudbg_mbytes_to_bytes(unsigned int size)
+{
+	return size * 1024 * 1024;
+}
+#endif /* __CUDBG_IF_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
new file mode 100644
index 000000000000..d699bf88d18f
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -0,0 +1,1929 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#include "t4_regs.h"
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_lib.h"
+#include "cudbg_entity.h"
+
+static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff,
+					 struct cudbg_buffer *dbg_buff)
+{
+	cudbg_update_buff(pin_buff, dbg_buff);
+	cudbg_put_buff(pin_buff, dbg_buff);
+}
+
+static int is_fw_attached(struct cudbg_init *pdbg_init)
+{
+	struct adapter *padap = pdbg_init->adap;
+
+	if (!(padap->flags & FW_OK) || padap->use_bd)
+		return 0;
+
+	return 1;
+}
+
+/* This function will add additional padding bytes into debug_buffer to make it
+ * 4 byte aligned.
+ */
+void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
+			      struct cudbg_entity_hdr *entity_hdr)
+{
+	u8 zero_buf[4] = {0};
+	u8 padding, remain;
+
+	remain = (dbg_buff->offset - entity_hdr->start_offset) % 4;
+	padding = 4 - remain;
+	if (remain) {
+		memcpy(((u8 *)dbg_buff->data) + dbg_buff->offset, &zero_buf,
+		       padding);
+		dbg_buff->offset += padding;
+		entity_hdr->num_pad = padding;
+	}
+	entity_hdr->size = dbg_buff->offset - entity_hdr->start_offset;
+}
+
+struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i)
+{
+	struct cudbg_hdr *cudbg_hdr = (struct cudbg_hdr *)outbuf;
+
+	return (struct cudbg_entity_hdr *)
+	       ((char *)outbuf + cudbg_hdr->hdr_len +
+		(sizeof(struct cudbg_entity_hdr) * (i - 1)));
+}
+
+static int cudbg_read_vpd_reg(struct adapter *padap, u32 addr, u32 len,
+			      void *dest)
+{
+	int vaddr, rc;
+
+	vaddr = t4_eeprom_ptov(addr, padap->pf, EEPROMPFSIZE);
+	if (vaddr < 0)
+		return vaddr;
+
+	rc = pci_read_vpd(padap->pdev, vaddr, len, dest);
+	if (rc < 0)
+		return rc;
+
+	return 0;
+}
+
+int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	u32 buf_size = 0;
+	int rc = 0;
+
+	if (is_t4(padap->params.chip))
+		buf_size = T4_REGMAP_SIZE;
+	else if (is_t5(padap->params.chip) || is_t6(padap->params.chip))
+		buf_size = T5_REGMAP_SIZE;
+
+	rc = cudbg_get_buff(dbg_buff, buf_size, &temp_buff);
+	if (rc)
+		return rc;
+	t4_get_regs(padap, (void *)temp_buff.data, temp_buff.size);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init,
+			    struct cudbg_buffer *dbg_buff,
+			    struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct devlog_params *dparams;
+	int rc = 0;
+
+	rc = t4_init_devlog_params(padap);
+	if (rc < 0) {
+		cudbg_err->sys_err = rc;
+		return rc;
+	}
+
+	dparams = &padap->params.devlog;
+	rc = cudbg_get_buff(dbg_buff, dparams->size, &temp_buff);
+	if (rc)
+		return rc;
+
+	/* Collect FW devlog */
+	if (dparams->start != 0) {
+		spin_lock(&padap->win0_lock);
+		rc = t4_memory_rw(padap, padap->params.drv_memwin,
+				  dparams->memtype, dparams->start,
+				  dparams->size,
+				  (__be32 *)(char *)temp_buff.data,
+				  1);
+		spin_unlock(&padap->win0_lock);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_cim_la(struct cudbg_init *pdbg_init,
+			 struct cudbg_buffer *dbg_buff,
+			 struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int size, rc;
+	u32 cfg = 0;
+
+	if (is_t6(padap->params.chip)) {
+		size = padap->params.cim_la_size / 10 + 1;
+		size *= 11 * sizeof(u32);
+	} else {
+		size = padap->params.cim_la_size / 8;
+		size *= 8 * sizeof(u32);
+	}
+
+	size += sizeof(cfg);
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	rc = t4_cim_read(padap, UP_UP_DBG_LA_CFG_A, 1, &cfg);
+	if (rc) {
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+
+	memcpy((char *)temp_buff.data, &cfg, sizeof(cfg));
+	rc = t4_cim_read_la(padap,
+			    (u32 *)((char *)temp_buff.data + sizeof(cfg)),
+			    NULL);
+	if (rc < 0) {
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init,
+			    struct cudbg_buffer *dbg_buff,
+			    struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int size, rc;
+
+	size = 2 * CIM_MALA_SIZE * 5 * sizeof(u32);
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	t4_cim_read_ma_la(padap,
+			  (u32 *)temp_buff.data,
+			  (u32 *)((char *)temp_buff.data +
+				  5 * CIM_MALA_SIZE));
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_cim_qcfg *cim_qcfg_data;
+	int rc;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_cim_qcfg),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	cim_qcfg_data = (struct cudbg_cim_qcfg *)temp_buff.data;
+	cim_qcfg_data->chip = padap->params.chip;
+	rc = t4_cim_read(padap, UP_IBQ_0_RDADDR_A,
+			 ARRAY_SIZE(cim_qcfg_data->stat), cim_qcfg_data->stat);
+	if (rc) {
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+
+	rc = t4_cim_read(padap, UP_OBQ_0_REALADDR_A,
+			 ARRAY_SIZE(cim_qcfg_data->obq_wr),
+			 cim_qcfg_data->obq_wr);
+	if (rc) {
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+
+	t4_read_cimq_cfg(padap, cim_qcfg_data->base, cim_qcfg_data->size,
+			 cim_qcfg_data->thres);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err, int qid)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int no_of_read_words, rc = 0;
+	u32 qsize;
+
+	/* collect CIM IBQ */
+	qsize = CIM_IBQ_SIZE * 4 * sizeof(u32);
+	rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff);
+	if (rc)
+		return rc;
+
+	/* t4_read_cim_ibq will return no. of read words or error */
+	no_of_read_words = t4_read_cim_ibq(padap, qid,
+					   (u32 *)temp_buff.data, qsize);
+	/* no_of_read_words is less than or equal to 0 means error */
+	if (no_of_read_words <= 0) {
+		if (!no_of_read_words)
+			rc = CUDBG_SYSTEM_ERROR;
+		else
+			rc = no_of_read_words;
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 0);
+}
+
+int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 1);
+}
+
+int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 2);
+}
+
+int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 3);
+}
+
+int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 4);
+}
+
+int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 5);
+}
+
+u32 cudbg_cim_obq_size(struct adapter *padap, int qid)
+{
+	u32 value;
+
+	t4_write_reg(padap, CIM_QUEUE_CONFIG_REF_A, OBQSELECT_F |
+		     QUENUMSELECT_V(qid));
+	value = t4_read_reg(padap, CIM_QUEUE_CONFIG_CTRL_A);
+	value = CIMQSIZE_G(value) * 64; /* size in number of words */
+	return value * sizeof(u32);
+}
+
+static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err, int qid)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int no_of_read_words, rc = 0;
+	u32 qsize;
+
+	/* collect CIM OBQ */
+	qsize =  cudbg_cim_obq_size(padap, qid);
+	rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff);
+	if (rc)
+		return rc;
+
+	/* t4_read_cim_obq will return no. of read words or error */
+	no_of_read_words = t4_read_cim_obq(padap, qid,
+					   (u32 *)temp_buff.data, qsize);
+	/* no_of_read_words is less than or equal to 0 means error */
+	if (no_of_read_words <= 0) {
+		if (!no_of_read_words)
+			rc = CUDBG_SYSTEM_ERROR;
+		else
+			rc = no_of_read_words;
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 0);
+}
+
+int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 1);
+}
+
+int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 2);
+}
+
+int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 3);
+}
+
+int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 4);
+}
+
+int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 5);
+}
+
+int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 6);
+}
+
+int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err)
+{
+	return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 7);
+}
+
+static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
+			     struct cudbg_buffer *dbg_buff, u8 mem_type,
+			     unsigned long tot_len,
+			     struct cudbg_error *cudbg_err)
+{
+	unsigned long bytes, bytes_left, bytes_read = 0;
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int rc = 0;
+
+	bytes_left = tot_len;
+	while (bytes_left > 0) {
+		bytes = min_t(unsigned long, bytes_left,
+			      (unsigned long)CUDBG_CHUNK_SIZE);
+		rc = cudbg_get_buff(dbg_buff, bytes, &temp_buff);
+		if (rc)
+			return rc;
+		spin_lock(&padap->win0_lock);
+		rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
+				  bytes_read, bytes,
+				  (__be32 *)temp_buff.data,
+				  1);
+		spin_unlock(&padap->win0_lock);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+		bytes_left -= bytes;
+		bytes_read += bytes;
+		cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	}
+	return rc;
+}
+
+static void cudbg_collect_mem_info(struct cudbg_init *pdbg_init,
+				   struct card_mem *mem_info)
+{
+	struct adapter *padap = pdbg_init->adap;
+	u32 value;
+
+	value = t4_read_reg(padap, MA_EDRAM0_BAR_A);
+	value = EDRAM0_SIZE_G(value);
+	mem_info->size_edc0 = (u16)value;
+
+	value = t4_read_reg(padap, MA_EDRAM1_BAR_A);
+	value = EDRAM1_SIZE_G(value);
+	mem_info->size_edc1 = (u16)value;
+
+	value = t4_read_reg(padap, MA_TARGET_MEM_ENABLE_A);
+	if (value & EDRAM0_ENABLE_F)
+		mem_info->mem_flag |= (1 << EDC0_FLAG);
+	if (value & EDRAM1_ENABLE_F)
+		mem_info->mem_flag |= (1 << EDC1_FLAG);
+}
+
+static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init,
+			     struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	int rc;
+
+	if (is_fw_attached(pdbg_init)) {
+		/* Flush uP dcache before reading edcX/mcX  */
+		rc = t4_fwcache(padap, FW_PARAM_DEV_FWCACHE_FLUSH);
+		if (rc)
+			cudbg_err->sys_warn = rc;
+	}
+}
+
+static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
+				    struct cudbg_buffer *dbg_buff,
+				    struct cudbg_error *cudbg_err,
+				    u8 mem_type)
+{
+	struct card_mem mem_info = {0};
+	unsigned long flag, size;
+	int rc;
+
+	cudbg_t4_fwcache(pdbg_init, cudbg_err);
+	cudbg_collect_mem_info(pdbg_init, &mem_info);
+	switch (mem_type) {
+	case MEM_EDC0:
+		flag = (1 << EDC0_FLAG);
+		size = cudbg_mbytes_to_bytes(mem_info.size_edc0);
+		break;
+	case MEM_EDC1:
+		flag = (1 << EDC1_FLAG);
+		size = cudbg_mbytes_to_bytes(mem_info.size_edc1);
+		break;
+	default:
+		rc = CUDBG_STATUS_ENTITY_NOT_FOUND;
+		goto err;
+	}
+
+	if (mem_info.mem_flag & flag) {
+		rc = cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type,
+				       size, cudbg_err);
+		if (rc)
+			goto err;
+	} else {
+		rc = CUDBG_STATUS_ENTITY_NOT_FOUND;
+		goto err;
+	}
+err:
+	return rc;
+}
+
+int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err,
+					MEM_EDC0);
+}
+
+int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err,
+					MEM_EDC1);
+}
+
+int cudbg_collect_rss(struct cudbg_init *pdbg_init,
+		      struct cudbg_buffer *dbg_buff,
+		      struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int rc;
+
+	rc = cudbg_get_buff(dbg_buff, RSS_NENTRIES * sizeof(u16), &temp_buff);
+	if (rc)
+		return rc;
+
+	rc = t4_read_rss(padap, (u16 *)temp_buff.data);
+	if (rc) {
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_rss_vf_conf *vfconf;
+	int vf, rc, vf_count;
+
+	vf_count = padap->params.arch.vfcount;
+	rc = cudbg_get_buff(dbg_buff,
+			    vf_count * sizeof(struct cudbg_rss_vf_conf),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	vfconf = (struct cudbg_rss_vf_conf *)temp_buff.data;
+	for (vf = 0; vf < vf_count; vf++)
+		t4_read_rss_vf_config(padap, vf, &vfconf[vf].rss_vf_vfl,
+				      &vfconf[vf].rss_vf_vfh, true);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int rc;
+
+	rc = cudbg_get_buff(dbg_buff, NMTUS * sizeof(u16), &temp_buff);
+	if (rc)
+		return rc;
+
+	t4_read_mtu_tbl(padap, (u16 *)temp_buff.data, NULL);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_pm_stats *pm_stats_buff;
+	int rc;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pm_stats),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	pm_stats_buff = (struct cudbg_pm_stats *)temp_buff.data;
+	t4_pmtx_get_stats(padap, pm_stats_buff->tx_cnt, pm_stats_buff->tx_cyc);
+	t4_pmrx_get_stats(padap, pm_stats_buff->rx_cnt, pm_stats_buff->rx_cyc);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_hw_sched *hw_sched_buff;
+	int i, rc = 0;
+
+	if (!padap->params.vpd.cclk)
+		return CUDBG_STATUS_CCLK_NOT_DEFINED;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_hw_sched),
+			    &temp_buff);
+	hw_sched_buff = (struct cudbg_hw_sched *)temp_buff.data;
+	hw_sched_buff->map = t4_read_reg(padap, TP_TX_MOD_QUEUE_REQ_MAP_A);
+	hw_sched_buff->mode = TIMERMODE_G(t4_read_reg(padap, TP_MOD_CONFIG_A));
+	t4_read_pace_tbl(padap, hw_sched_buff->pace_tab);
+	for (i = 0; i < NTX_SCHED; ++i)
+		t4_get_tx_sched(padap, i, &hw_sched_buff->kbps[i],
+				&hw_sched_buff->ipg[i], true);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct ireg_buf *ch_tp_pio;
+	int i, rc, n = 0;
+	u32 size;
+
+	if (is_t5(padap->params.chip))
+		n = sizeof(t5_tp_pio_array) +
+		    sizeof(t5_tp_tm_pio_array) +
+		    sizeof(t5_tp_mib_index_array);
+	else
+		n = sizeof(t6_tp_pio_array) +
+		    sizeof(t6_tp_tm_pio_array) +
+		    sizeof(t6_tp_mib_index_array);
+
+	n = n / (IREG_NUM_ELEM * sizeof(u32));
+	size = sizeof(struct ireg_buf) * n;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	ch_tp_pio = (struct ireg_buf *)temp_buff.data;
+
+	/* TP_PIO */
+	if (is_t5(padap->params.chip))
+		n = sizeof(t5_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+	else if (is_t6(padap->params.chip))
+		n = sizeof(t6_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+
+	for (i = 0; i < n; i++) {
+		struct ireg_field *tp_pio = &ch_tp_pio->tp_pio;
+		u32 *buff = ch_tp_pio->outbuf;
+
+		if (is_t5(padap->params.chip)) {
+			tp_pio->ireg_addr = t5_tp_pio_array[i][0];
+			tp_pio->ireg_data = t5_tp_pio_array[i][1];
+			tp_pio->ireg_local_offset = t5_tp_pio_array[i][2];
+			tp_pio->ireg_offset_range = t5_tp_pio_array[i][3];
+		} else if (is_t6(padap->params.chip)) {
+			tp_pio->ireg_addr = t6_tp_pio_array[i][0];
+			tp_pio->ireg_data = t6_tp_pio_array[i][1];
+			tp_pio->ireg_local_offset = t6_tp_pio_array[i][2];
+			tp_pio->ireg_offset_range = t6_tp_pio_array[i][3];
+		}
+		t4_tp_pio_read(padap, buff, tp_pio->ireg_offset_range,
+			       tp_pio->ireg_local_offset, true);
+		ch_tp_pio++;
+	}
+
+	/* TP_TM_PIO */
+	if (is_t5(padap->params.chip))
+		n = sizeof(t5_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+	else if (is_t6(padap->params.chip))
+		n = sizeof(t6_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+
+	for (i = 0; i < n; i++) {
+		struct ireg_field *tp_pio = &ch_tp_pio->tp_pio;
+		u32 *buff = ch_tp_pio->outbuf;
+
+		if (is_t5(padap->params.chip)) {
+			tp_pio->ireg_addr = t5_tp_tm_pio_array[i][0];
+			tp_pio->ireg_data = t5_tp_tm_pio_array[i][1];
+			tp_pio->ireg_local_offset = t5_tp_tm_pio_array[i][2];
+			tp_pio->ireg_offset_range = t5_tp_tm_pio_array[i][3];
+		} else if (is_t6(padap->params.chip)) {
+			tp_pio->ireg_addr = t6_tp_tm_pio_array[i][0];
+			tp_pio->ireg_data = t6_tp_tm_pio_array[i][1];
+			tp_pio->ireg_local_offset = t6_tp_tm_pio_array[i][2];
+			tp_pio->ireg_offset_range = t6_tp_tm_pio_array[i][3];
+		}
+		t4_tp_tm_pio_read(padap, buff, tp_pio->ireg_offset_range,
+				  tp_pio->ireg_local_offset, true);
+		ch_tp_pio++;
+	}
+
+	/* TP_MIB_INDEX */
+	if (is_t5(padap->params.chip))
+		n = sizeof(t5_tp_mib_index_array) /
+		    (IREG_NUM_ELEM * sizeof(u32));
+	else if (is_t6(padap->params.chip))
+		n = sizeof(t6_tp_mib_index_array) /
+		    (IREG_NUM_ELEM * sizeof(u32));
+
+	for (i = 0; i < n ; i++) {
+		struct ireg_field *tp_pio = &ch_tp_pio->tp_pio;
+		u32 *buff = ch_tp_pio->outbuf;
+
+		if (is_t5(padap->params.chip)) {
+			tp_pio->ireg_addr = t5_tp_mib_index_array[i][0];
+			tp_pio->ireg_data = t5_tp_mib_index_array[i][1];
+			tp_pio->ireg_local_offset =
+				t5_tp_mib_index_array[i][2];
+			tp_pio->ireg_offset_range =
+				t5_tp_mib_index_array[i][3];
+		} else if (is_t6(padap->params.chip)) {
+			tp_pio->ireg_addr = t6_tp_mib_index_array[i][0];
+			tp_pio->ireg_data = t6_tp_mib_index_array[i][1];
+			tp_pio->ireg_local_offset =
+				t6_tp_mib_index_array[i][2];
+			tp_pio->ireg_offset_range =
+				t6_tp_mib_index_array[i][3];
+		}
+		t4_tp_mib_read(padap, buff, tp_pio->ireg_offset_range,
+			       tp_pio->ireg_local_offset, true);
+		ch_tp_pio++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct ireg_buf *ch_sge_dbg;
+	int i, rc;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(*ch_sge_dbg) * 2, &temp_buff);
+	if (rc)
+		return rc;
+
+	ch_sge_dbg = (struct ireg_buf *)temp_buff.data;
+	for (i = 0; i < 2; i++) {
+		struct ireg_field *sge_pio = &ch_sge_dbg->tp_pio;
+		u32 *buff = ch_sge_dbg->outbuf;
+
+		sge_pio->ireg_addr = t5_sge_dbg_index_array[i][0];
+		sge_pio->ireg_data = t5_sge_dbg_index_array[i][1];
+		sge_pio->ireg_local_offset = t5_sge_dbg_index_array[i][2];
+		sge_pio->ireg_offset_range = t5_sge_dbg_index_array[i][3];
+		t4_read_indirect(padap,
+				 sge_pio->ireg_addr,
+				 sge_pio->ireg_data,
+				 buff,
+				 sge_pio->ireg_offset_range,
+				 sge_pio->ireg_local_offset);
+		ch_sge_dbg++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_ulprx_la *ulprx_la_buff;
+	int rc;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulprx_la),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	ulprx_la_buff = (struct cudbg_ulprx_la *)temp_buff.data;
+	t4_ulprx_read_la(padap, (u32 *)ulprx_la_buff->data);
+	ulprx_la_buff->size = ULPRX_LA_SIZE;
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_tp_la(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_tp_la *tp_la_buff;
+	int size, rc;
+
+	size = sizeof(struct cudbg_tp_la) + TPLA_SIZE *  sizeof(u64);
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	tp_la_buff = (struct cudbg_tp_la *)temp_buff.data;
+	tp_la_buff->mode = DBGLAMODE_G(t4_read_reg(padap, TP_DBG_LA_CONFIG_A));
+	t4_tp_read_la(padap, (u64 *)tp_la_buff->data, NULL);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init,
+			     struct cudbg_buffer *dbg_buff,
+			     struct cudbg_error *cudbg_err)
+{
+	struct cudbg_cim_pif_la *cim_pif_la_buff;
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	int size, rc;
+
+	size = sizeof(struct cudbg_cim_pif_la) +
+	       2 * CIM_PIFLA_SIZE * 6 * sizeof(u32);
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	cim_pif_la_buff = (struct cudbg_cim_pif_la *)temp_buff.data;
+	cim_pif_la_buff->size = CIM_PIFLA_SIZE;
+	t4_cim_read_pif_la(padap, (u32 *)cim_pif_la_buff->data,
+			   (u32 *)cim_pif_la_buff->data + 6 * CIM_PIFLA_SIZE,
+			   NULL, NULL);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_clk_info(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_clk_info *clk_info_buff;
+	u64 tp_tick_us;
+	int rc;
+
+	if (!padap->params.vpd.cclk)
+		return CUDBG_STATUS_CCLK_NOT_DEFINED;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_clk_info),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	clk_info_buff = (struct cudbg_clk_info *)temp_buff.data;
+	clk_info_buff->cclk_ps = 1000000000 / padap->params.vpd.cclk; /* psec */
+	clk_info_buff->res = t4_read_reg(padap, TP_TIMER_RESOLUTION_A);
+	clk_info_buff->tre = TIMERRESOLUTION_G(clk_info_buff->res);
+	clk_info_buff->dack_re = DELAYEDACKRESOLUTION_G(clk_info_buff->res);
+	tp_tick_us = (clk_info_buff->cclk_ps << clk_info_buff->tre) / 1000000;
+
+	clk_info_buff->dack_timer =
+		(clk_info_buff->cclk_ps << clk_info_buff->dack_re) / 1000000 *
+		t4_read_reg(padap, TP_DACK_TIMER_A);
+	clk_info_buff->retransmit_min =
+		tp_tick_us * t4_read_reg(padap, TP_RXT_MIN_A);
+	clk_info_buff->retransmit_max =
+		tp_tick_us * t4_read_reg(padap, TP_RXT_MAX_A);
+	clk_info_buff->persist_timer_min =
+		tp_tick_us * t4_read_reg(padap, TP_PERS_MIN_A);
+	clk_info_buff->persist_timer_max =
+		tp_tick_us * t4_read_reg(padap, TP_PERS_MAX_A);
+	clk_info_buff->keepalive_idle_timer =
+		tp_tick_us * t4_read_reg(padap, TP_KEEP_IDLE_A);
+	clk_info_buff->keepalive_interval =
+		tp_tick_us * t4_read_reg(padap, TP_KEEP_INTVL_A);
+	clk_info_buff->initial_srtt =
+		tp_tick_us * INITSRTT_G(t4_read_reg(padap, TP_INIT_SRTT_A));
+	clk_info_buff->finwait2_timer =
+		tp_tick_us * t4_read_reg(padap, TP_FINWAIT2_TIMER_A);
+
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct ireg_buf *ch_pcie;
+	int i, rc, n;
+	u32 size;
+
+	n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
+	size = sizeof(struct ireg_buf) * n * 2;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	ch_pcie = (struct ireg_buf *)temp_buff.data;
+	/* PCIE_PDBG */
+	for (i = 0; i < n; i++) {
+		struct ireg_field *pcie_pio = &ch_pcie->tp_pio;
+		u32 *buff = ch_pcie->outbuf;
+
+		pcie_pio->ireg_addr = t5_pcie_pdbg_array[i][0];
+		pcie_pio->ireg_data = t5_pcie_pdbg_array[i][1];
+		pcie_pio->ireg_local_offset = t5_pcie_pdbg_array[i][2];
+		pcie_pio->ireg_offset_range = t5_pcie_pdbg_array[i][3];
+		t4_read_indirect(padap,
+				 pcie_pio->ireg_addr,
+				 pcie_pio->ireg_data,
+				 buff,
+				 pcie_pio->ireg_offset_range,
+				 pcie_pio->ireg_local_offset);
+		ch_pcie++;
+	}
+
+	/* PCIE_CDBG */
+	n = sizeof(t5_pcie_cdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
+	for (i = 0; i < n; i++) {
+		struct ireg_field *pcie_pio = &ch_pcie->tp_pio;
+		u32 *buff = ch_pcie->outbuf;
+
+		pcie_pio->ireg_addr = t5_pcie_cdbg_array[i][0];
+		pcie_pio->ireg_data = t5_pcie_cdbg_array[i][1];
+		pcie_pio->ireg_local_offset = t5_pcie_cdbg_array[i][2];
+		pcie_pio->ireg_offset_range = t5_pcie_cdbg_array[i][3];
+		t4_read_indirect(padap,
+				 pcie_pio->ireg_addr,
+				 pcie_pio->ireg_data,
+				 buff,
+				 pcie_pio->ireg_offset_range,
+				 pcie_pio->ireg_local_offset);
+		ch_pcie++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct ireg_buf *ch_pm;
+	int i, rc, n;
+	u32 size;
+
+	n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32));
+	size = sizeof(struct ireg_buf) * n * 2;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	ch_pm = (struct ireg_buf *)temp_buff.data;
+	/* PM_RX */
+	for (i = 0; i < n; i++) {
+		struct ireg_field *pm_pio = &ch_pm->tp_pio;
+		u32 *buff = ch_pm->outbuf;
+
+		pm_pio->ireg_addr = t5_pm_rx_array[i][0];
+		pm_pio->ireg_data = t5_pm_rx_array[i][1];
+		pm_pio->ireg_local_offset = t5_pm_rx_array[i][2];
+		pm_pio->ireg_offset_range = t5_pm_rx_array[i][3];
+		t4_read_indirect(padap,
+				 pm_pio->ireg_addr,
+				 pm_pio->ireg_data,
+				 buff,
+				 pm_pio->ireg_offset_range,
+				 pm_pio->ireg_local_offset);
+		ch_pm++;
+	}
+
+	/* PM_TX */
+	n = sizeof(t5_pm_tx_array) / (IREG_NUM_ELEM * sizeof(u32));
+	for (i = 0; i < n; i++) {
+		struct ireg_field *pm_pio = &ch_pm->tp_pio;
+		u32 *buff = ch_pm->outbuf;
+
+		pm_pio->ireg_addr = t5_pm_tx_array[i][0];
+		pm_pio->ireg_data = t5_pm_tx_array[i][1];
+		pm_pio->ireg_local_offset = t5_pm_tx_array[i][2];
+		pm_pio->ireg_offset_range = t5_pm_tx_array[i][3];
+		t4_read_indirect(padap,
+				 pm_pio->ireg_addr,
+				 pm_pio->ireg_data,
+				 buff,
+				 pm_pio->ireg_offset_range,
+				 pm_pio->ireg_local_offset);
+		ch_pm++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_tid(struct cudbg_init *pdbg_init,
+		      struct cudbg_buffer *dbg_buff,
+		      struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_tid_info_region_rev1 *tid1;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_tid_info_region *tid;
+	u32 para[2], val[2];
+	int rc;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_tid_info_region_rev1),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	tid1 = (struct cudbg_tid_info_region_rev1 *)temp_buff.data;
+	tid = &tid1->tid;
+	tid1->ver_hdr.signature = CUDBG_ENTITY_SIGNATURE;
+	tid1->ver_hdr.revision = CUDBG_TID_INFO_REV;
+	tid1->ver_hdr.size = sizeof(struct cudbg_tid_info_region_rev1) -
+			     sizeof(struct cudbg_ver_hdr);
+
+#define FW_PARAM_PFVF_A(param) \
+	(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
+	 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \
+	 FW_PARAMS_PARAM_Y_V(0) | \
+	 FW_PARAMS_PARAM_Z_V(0))
+
+	para[0] = FW_PARAM_PFVF_A(ETHOFLD_START);
+	para[1] = FW_PARAM_PFVF_A(ETHOFLD_END);
+	rc = t4_query_params(padap, padap->mbox, padap->pf, 0, 2, para, val);
+	if (rc <  0) {
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+	tid->uotid_base = val[0];
+	tid->nuotids = val[1] - val[0] + 1;
+
+	if (is_t5(padap->params.chip)) {
+		tid->sb = t4_read_reg(padap, LE_DB_SERVER_INDEX_A) / 4;
+	} else if (is_t6(padap->params.chip)) {
+		tid1->tid_start =
+			t4_read_reg(padap, LE_DB_ACTIVE_TABLE_START_INDEX_A);
+		tid->sb = t4_read_reg(padap, LE_DB_SRVR_START_INDEX_A);
+
+		para[0] = FW_PARAM_PFVF_A(HPFILTER_START);
+		para[1] = FW_PARAM_PFVF_A(HPFILTER_END);
+		rc = t4_query_params(padap, padap->mbox, padap->pf, 0, 2,
+				     para, val);
+		if (rc < 0) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+		tid->hpftid_base = val[0];
+		tid->nhpftids = val[1] - val[0] + 1;
+	}
+
+	tid->ntids = padap->tids.ntids;
+	tid->nstids = padap->tids.nstids;
+	tid->stid_base = padap->tids.stid_base;
+	tid->hash_base = padap->tids.hash_base;
+
+	tid->natids = padap->tids.natids;
+	tid->nftids = padap->tids.nftids;
+	tid->ftid_base = padap->tids.ftid_base;
+	tid->aftid_base = padap->tids.aftid_base;
+	tid->aftid_end = padap->tids.aftid_end;
+
+	tid->sftid_base = padap->tids.sftid_base;
+	tid->nsftids = padap->tids.nsftids;
+
+	tid->flags = padap->flags;
+	tid->le_db_conf = t4_read_reg(padap, LE_DB_CONFIG_A);
+	tid->ip_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV4_A);
+	tid->ipv6_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV6_A);
+
+#undef FW_PARAM_PFVF_A
+
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_dump_context_size(struct adapter *padap)
+{
+	u32 value, size;
+	u8 flq;
+
+	value = t4_read_reg(padap, SGE_FLM_CFG_A);
+
+	/* Get number of data freelist queues */
+	flq = HDRSTARTFLQ_G(value);
+	size = CUDBG_MAX_FL_QIDS >> flq;
+
+	/* Add extra space for congestion manager contexts.
+	 * The number of CONM contexts are same as number of freelist
+	 * queues.
+	 */
+	size += size;
+	return size * sizeof(struct cudbg_ch_cntxt);
+}
+
+static void cudbg_read_sge_ctxt(struct cudbg_init *pdbg_init, u32 cid,
+				enum ctxt_type ctype, u32 *data)
+{
+	struct adapter *padap = pdbg_init->adap;
+	int rc = -1;
+
+	/* Under heavy traffic, the SGE Queue contexts registers will be
+	 * frequently accessed by firmware.
+	 *
+	 * To avoid conflicts with firmware, always ask firmware to fetch
+	 * the SGE Queue contexts via mailbox. On failure, fallback to
+	 * accessing hardware registers directly.
+	 */
+	if (is_fw_attached(pdbg_init))
+		rc = t4_sge_ctxt_rd(padap, padap->mbox, cid, ctype, data);
+	if (rc)
+		t4_sge_ctxt_rd_bd(padap, cid, ctype, data);
+}
+
+int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_ch_cntxt *buff;
+	u32 size, i = 0;
+	int rc;
+
+	rc = cudbg_dump_context_size(padap);
+	if (rc <= 0)
+		return CUDBG_STATUS_ENTITY_NOT_FOUND;
+
+	size = rc;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	buff = (struct cudbg_ch_cntxt *)temp_buff.data;
+	while (size > 0) {
+		buff->cntxt_type = CTXT_FLM;
+		buff->cntxt_id = i;
+		cudbg_read_sge_ctxt(pdbg_init, i, CTXT_FLM, buff->data);
+		buff++;
+		size -= sizeof(struct cudbg_ch_cntxt);
+
+		buff->cntxt_type = CTXT_CNM;
+		buff->cntxt_id = i;
+		cudbg_read_sge_ctxt(pdbg_init, i, CTXT_CNM, buff->data);
+		buff++;
+		size -= sizeof(struct cudbg_ch_cntxt);
+
+		i++;
+	}
+
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+static inline void cudbg_tcamxy2valmask(u64 x, u64 y, u8 *addr, u64 *mask)
+{
+	*mask = x | y;
+	y = (__force u64)cpu_to_be64(y);
+	memcpy(addr, (char *)&y + 2, ETH_ALEN);
+}
+
+static void cudbg_mps_rpl_backdoor(struct adapter *padap,
+				   struct fw_ldst_mps_rplc *mps_rplc)
+{
+	if (is_t5(padap->params.chip)) {
+		mps_rplc->rplc255_224 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP3_A));
+		mps_rplc->rplc223_192 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP2_A));
+		mps_rplc->rplc191_160 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP1_A));
+		mps_rplc->rplc159_128 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP0_A));
+	} else {
+		mps_rplc->rplc255_224 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP7_A));
+		mps_rplc->rplc223_192 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP6_A));
+		mps_rplc->rplc191_160 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP5_A));
+		mps_rplc->rplc159_128 = htonl(t4_read_reg(padap,
+							  MPS_VF_RPLCT_MAP4_A));
+	}
+	mps_rplc->rplc127_96 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP3_A));
+	mps_rplc->rplc95_64 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP2_A));
+	mps_rplc->rplc63_32 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP1_A));
+	mps_rplc->rplc31_0 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP0_A));
+}
+
+static int cudbg_collect_tcam_index(struct adapter *padap,
+				    struct cudbg_mps_tcam *tcam, u32 idx)
+{
+	u64 tcamy, tcamx, val;
+	u32 ctl, data2;
+	int rc = 0;
+
+	if (CHELSIO_CHIP_VERSION(padap->params.chip) >= CHELSIO_T6) {
+		/* CtlReqID   - 1: use Host Driver Requester ID
+		 * CtlCmdType - 0: Read, 1: Write
+		 * CtlTcamSel - 0: TCAM0, 1: TCAM1
+		 * CtlXYBitSel- 0: Y bit, 1: X bit
+		 */
+
+		/* Read tcamy */
+		ctl = CTLREQID_V(1) | CTLCMDTYPE_V(0) | CTLXYBITSEL_V(0);
+		if (idx < 256)
+			ctl |= CTLTCAMINDEX_V(idx) | CTLTCAMSEL_V(0);
+		else
+			ctl |= CTLTCAMINDEX_V(idx - 256) | CTLTCAMSEL_V(1);
+
+		t4_write_reg(padap, MPS_CLS_TCAM_DATA2_CTL_A, ctl);
+		val = t4_read_reg(padap, MPS_CLS_TCAM_RDATA1_REQ_ID1_A);
+		tcamy = DMACH_G(val) << 32;
+		tcamy |= t4_read_reg(padap, MPS_CLS_TCAM_RDATA0_REQ_ID1_A);
+		data2 = t4_read_reg(padap, MPS_CLS_TCAM_RDATA2_REQ_ID1_A);
+		tcam->lookup_type = DATALKPTYPE_G(data2);
+
+		/* 0 - Outer header, 1 - Inner header
+		 * [71:48] bit locations are overloaded for
+		 * outer vs. inner lookup types.
+		 */
+		if (tcam->lookup_type && tcam->lookup_type != DATALKPTYPE_M) {
+			/* Inner header VNI */
+			tcam->vniy = (data2 & DATAVIDH2_F) | DATAVIDH1_G(data2);
+			tcam->vniy = (tcam->vniy << 16) | VIDL_G(val);
+			tcam->dip_hit = data2 & DATADIPHIT_F;
+		} else {
+			tcam->vlan_vld = data2 & DATAVIDH2_F;
+			tcam->ivlan = VIDL_G(val);
+		}
+
+		tcam->port_num = DATAPORTNUM_G(data2);
+
+		/* Read tcamx. Change the control param */
+		ctl |= CTLXYBITSEL_V(1);
+		t4_write_reg(padap, MPS_CLS_TCAM_DATA2_CTL_A, ctl);
+		val = t4_read_reg(padap, MPS_CLS_TCAM_RDATA1_REQ_ID1_A);
+		tcamx = DMACH_G(val) << 32;
+		tcamx |= t4_read_reg(padap, MPS_CLS_TCAM_RDATA0_REQ_ID1_A);
+		data2 = t4_read_reg(padap, MPS_CLS_TCAM_RDATA2_REQ_ID1_A);
+		if (tcam->lookup_type && tcam->lookup_type != DATALKPTYPE_M) {
+			/* Inner header VNI mask */
+			tcam->vnix = (data2 & DATAVIDH2_F) | DATAVIDH1_G(data2);
+			tcam->vnix = (tcam->vnix << 16) | VIDL_G(val);
+		}
+	} else {
+		tcamy = t4_read_reg64(padap, MPS_CLS_TCAM_Y_L(idx));
+		tcamx = t4_read_reg64(padap, MPS_CLS_TCAM_X_L(idx));
+	}
+
+	/* If no entry, return */
+	if (tcamx & tcamy)
+		return rc;
+
+	tcam->cls_lo = t4_read_reg(padap, MPS_CLS_SRAM_L(idx));
+	tcam->cls_hi = t4_read_reg(padap, MPS_CLS_SRAM_H(idx));
+
+	if (is_t5(padap->params.chip))
+		tcam->repli = (tcam->cls_lo & REPLICATE_F);
+	else if (is_t6(padap->params.chip))
+		tcam->repli = (tcam->cls_lo & T6_REPLICATE_F);
+
+	if (tcam->repli) {
+		struct fw_ldst_cmd ldst_cmd;
+		struct fw_ldst_mps_rplc mps_rplc;
+
+		memset(&ldst_cmd, 0, sizeof(ldst_cmd));
+		ldst_cmd.op_to_addrspace =
+			htonl(FW_CMD_OP_V(FW_LDST_CMD) |
+			      FW_CMD_REQUEST_F | FW_CMD_READ_F |
+			      FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_MPS));
+		ldst_cmd.cycles_to_len16 = htonl(FW_LEN16(ldst_cmd));
+		ldst_cmd.u.mps.rplc.fid_idx =
+			htons(FW_LDST_CMD_FID_V(FW_LDST_MPS_RPLC) |
+			      FW_LDST_CMD_IDX_V(idx));
+
+		rc = t4_wr_mbox(padap, padap->mbox, &ldst_cmd, sizeof(ldst_cmd),
+				&ldst_cmd);
+		if (rc)
+			cudbg_mps_rpl_backdoor(padap, &mps_rplc);
+		else
+			mps_rplc = ldst_cmd.u.mps.rplc;
+
+		tcam->rplc[0] = ntohl(mps_rplc.rplc31_0);
+		tcam->rplc[1] = ntohl(mps_rplc.rplc63_32);
+		tcam->rplc[2] = ntohl(mps_rplc.rplc95_64);
+		tcam->rplc[3] = ntohl(mps_rplc.rplc127_96);
+		if (padap->params.arch.mps_rplc_size > CUDBG_MAX_RPLC_SIZE) {
+			tcam->rplc[4] = ntohl(mps_rplc.rplc159_128);
+			tcam->rplc[5] = ntohl(mps_rplc.rplc191_160);
+			tcam->rplc[6] = ntohl(mps_rplc.rplc223_192);
+			tcam->rplc[7] = ntohl(mps_rplc.rplc255_224);
+		}
+	}
+	cudbg_tcamxy2valmask(tcamx, tcamy, tcam->addr, &tcam->mask);
+	tcam->idx = idx;
+	tcam->rplc_size = padap->params.arch.mps_rplc_size;
+	return rc;
+}
+
+int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	u32 size = 0, i, n, total_size = 0;
+	struct cudbg_mps_tcam *tcam;
+	int rc;
+
+	n = padap->params.arch.mps_tcam_size;
+	size = sizeof(struct cudbg_mps_tcam) * n;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	tcam = (struct cudbg_mps_tcam *)temp_buff.data;
+	for (i = 0; i < n; i++) {
+		rc = cudbg_collect_tcam_index(padap, tcam, i);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+		total_size += sizeof(struct cudbg_mps_tcam);
+		tcam++;
+	}
+
+	if (!total_size) {
+		rc = CUDBG_SYSTEM_ERROR;
+		cudbg_err->sys_err = rc;
+		cudbg_put_buff(&temp_buff, dbg_buff);
+		return rc;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	char vpd_str[CUDBG_VPD_VER_LEN + 1];
+	u32 scfg_vers, vpd_vers, fw_vers;
+	struct cudbg_vpd_data *vpd_data;
+	struct vpd_params vpd = { 0 };
+	int rc, ret;
+
+	rc = t4_get_raw_vpd_params(padap, &vpd);
+	if (rc)
+		return rc;
+
+	rc = t4_get_fw_version(padap, &fw_vers);
+	if (rc)
+		return rc;
+
+	/* Serial Configuration Version is located beyond the PF's vpd size.
+	 * Temporarily give access to entire EEPROM to get it.
+	 */
+	rc = pci_set_vpd_size(padap->pdev, EEPROMVSIZE);
+	if (rc < 0)
+		return rc;
+
+	ret = cudbg_read_vpd_reg(padap, CUDBG_SCFG_VER_ADDR, CUDBG_SCFG_VER_LEN,
+				 &scfg_vers);
+
+	/* Restore back to original PF's vpd size */
+	rc = pci_set_vpd_size(padap->pdev, CUDBG_VPD_PF_SIZE);
+	if (rc < 0)
+		return rc;
+
+	if (ret)
+		return ret;
+
+	rc = cudbg_read_vpd_reg(padap, CUDBG_VPD_VER_ADDR, CUDBG_VPD_VER_LEN,
+				vpd_str);
+	if (rc)
+		return rc;
+
+	vpd_str[CUDBG_VPD_VER_LEN] = '\0';
+	rc = kstrtouint(vpd_str, 0, &vpd_vers);
+	if (rc)
+		return rc;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_vpd_data),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	vpd_data = (struct cudbg_vpd_data *)temp_buff.data;
+	memcpy(vpd_data->sn, vpd.sn, SERNUM_LEN + 1);
+	memcpy(vpd_data->bn, vpd.pn, PN_LEN + 1);
+	memcpy(vpd_data->na, vpd.na, MACADDR_LEN + 1);
+	memcpy(vpd_data->mn, vpd.id, ID_LEN + 1);
+	vpd_data->scfg_vers = scfg_vers;
+	vpd_data->vpd_vers = vpd_vers;
+	vpd_data->fw_major = FW_HDR_FW_VER_MAJOR_G(fw_vers);
+	vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(fw_vers);
+	vpd_data->fw_micro = FW_HDR_FW_VER_MICRO_G(fw_vers);
+	vpd_data->fw_build = FW_HDR_FW_VER_BUILD_G(fw_vers);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+static int cudbg_read_tid(struct cudbg_init *pdbg_init, u32 tid,
+			  struct cudbg_tid_data *tid_data)
+{
+	struct adapter *padap = pdbg_init->adap;
+	int i, cmd_retry = 8;
+	u32 val;
+
+	/* Fill REQ_DATA regs with 0's */
+	for (i = 0; i < NUM_LE_DB_DBGI_REQ_DATA_INSTANCES; i++)
+		t4_write_reg(padap, LE_DB_DBGI_REQ_DATA_A + (i << 2), 0);
+
+	/* Write DBIG command */
+	val = DBGICMD_V(4) | DBGITID_V(tid);
+	t4_write_reg(padap, LE_DB_DBGI_REQ_TCAM_CMD_A, val);
+	tid_data->dbig_cmd = val;
+
+	val = DBGICMDSTRT_F | DBGICMDMODE_V(1); /* LE mode */
+	t4_write_reg(padap, LE_DB_DBGI_CONFIG_A, val);
+	tid_data->dbig_conf = val;
+
+	/* Poll the DBGICMDBUSY bit */
+	val = 1;
+	while (val) {
+		val = t4_read_reg(padap, LE_DB_DBGI_CONFIG_A);
+		val = val & DBGICMDBUSY_F;
+		cmd_retry--;
+		if (!cmd_retry)
+			return CUDBG_SYSTEM_ERROR;
+	}
+
+	/* Check RESP status */
+	val = t4_read_reg(padap, LE_DB_DBGI_RSP_STATUS_A);
+	tid_data->dbig_rsp_stat = val;
+	if (!(val & 1))
+		return CUDBG_SYSTEM_ERROR;
+
+	/* Read RESP data */
+	for (i = 0; i < NUM_LE_DB_DBGI_RSP_DATA_INSTANCES; i++)
+		tid_data->data[i] = t4_read_reg(padap,
+						LE_DB_DBGI_RSP_DATA_A +
+						(i << 2));
+	tid_data->tid = tid;
+	return 0;
+}
+
+static int cudbg_get_le_type(u32 tid, struct cudbg_tcam tcam_region)
+{
+	int type = LE_ET_UNKNOWN;
+
+	if (tid < tcam_region.server_start)
+		type = LE_ET_TCAM_CON;
+	else if (tid < tcam_region.filter_start)
+		type = LE_ET_TCAM_SERVER;
+	else if (tid < tcam_region.clip_start)
+		type = LE_ET_TCAM_FILTER;
+	else if (tid < tcam_region.routing_start)
+		type = LE_ET_TCAM_CLIP;
+	else if (tid < tcam_region.tid_hash_base)
+		type = LE_ET_TCAM_ROUTING;
+	else if (tid < tcam_region.max_tid)
+		type = LE_ET_HASH_CON;
+	else
+		type = LE_ET_INVALID_TID;
+
+	return type;
+}
+
+static int cudbg_is_ipv6_entry(struct cudbg_tid_data *tid_data,
+			       struct cudbg_tcam tcam_region)
+{
+	int ipv6 = 0;
+	int le_type;
+
+	le_type = cudbg_get_le_type(tid_data->tid, tcam_region);
+	if (tid_data->tid & 1)
+		return 0;
+
+	if (le_type == LE_ET_HASH_CON) {
+		ipv6 = tid_data->data[16] & 0x8000;
+	} else if (le_type == LE_ET_TCAM_CON) {
+		ipv6 = tid_data->data[16] & 0x8000;
+		if (ipv6)
+			ipv6 = tid_data->data[9] == 0x00C00000;
+	} else {
+		ipv6 = 0;
+	}
+	return ipv6;
+}
+
+void cudbg_fill_le_tcam_info(struct adapter *padap,
+			     struct cudbg_tcam *tcam_region)
+{
+	u32 value;
+
+	/* Get the LE regions */
+	value = t4_read_reg(padap, LE_DB_TID_HASHBASE_A); /* hash base index */
+	tcam_region->tid_hash_base = value;
+
+	/* Get routing table index */
+	value = t4_read_reg(padap, LE_DB_ROUTING_TABLE_INDEX_A);
+	tcam_region->routing_start = value;
+
+	/*Get clip table index */
+	value = t4_read_reg(padap, LE_DB_CLIP_TABLE_INDEX_A);
+	tcam_region->clip_start = value;
+
+	/* Get filter table index */
+	value = t4_read_reg(padap, LE_DB_FILTER_TABLE_INDEX_A);
+	tcam_region->filter_start = value;
+
+	/* Get server table index */
+	value = t4_read_reg(padap, LE_DB_SERVER_INDEX_A);
+	tcam_region->server_start = value;
+
+	/* Check whether hash is enabled and calculate the max tids */
+	value = t4_read_reg(padap, LE_DB_CONFIG_A);
+	if ((value >> HASHEN_S) & 1) {
+		value = t4_read_reg(padap, LE_DB_HASH_CONFIG_A);
+		if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) {
+			tcam_region->max_tid = (value & 0xFFFFF) +
+					       tcam_region->tid_hash_base;
+		} else {
+			value = HASHTIDSIZE_G(value);
+			value = 1 << value;
+			tcam_region->max_tid = value +
+					       tcam_region->tid_hash_base;
+		}
+	} else { /* hash not enabled */
+		tcam_region->max_tid = CUDBG_MAX_TCAM_TID;
+	}
+}
+
+int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init,
+			  struct cudbg_buffer *dbg_buff,
+			  struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_tcam tcam_region = { 0 };
+	struct cudbg_tid_data *tid_data;
+	u32 bytes = 0;
+	int rc, size;
+	u32 i;
+
+	cudbg_fill_le_tcam_info(padap, &tcam_region);
+
+	size = sizeof(struct cudbg_tid_data) * tcam_region.max_tid;
+	size += sizeof(struct cudbg_tcam);
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	memcpy(temp_buff.data, &tcam_region, sizeof(struct cudbg_tcam));
+	bytes = sizeof(struct cudbg_tcam);
+	tid_data = (struct cudbg_tid_data *)(temp_buff.data + bytes);
+	/* read all tid */
+	for (i = 0; i < tcam_region.max_tid; ) {
+		rc = cudbg_read_tid(pdbg_init, i, tid_data);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+
+		/* ipv6 takes two tids */
+		cudbg_is_ipv6_entry(tid_data, tcam_region) ? i += 2 : i++;
+
+		tid_data++;
+		bytes += sizeof(struct cudbg_tid_data);
+	}
+
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_cctrl(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	u32 size;
+	int rc;
+
+	size = sizeof(u16) * NMTUS * NCCTRL_WIN;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	t4_read_cong_tbl(padap, (void *)temp_buff.data);
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct ireg_buf *ma_indr;
+	int i, rc, n;
+	u32 size, j;
+
+	if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6)
+		return CUDBG_STATUS_ENTITY_NOT_FOUND;
+
+	n = sizeof(t6_ma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32));
+	size = sizeof(struct ireg_buf) * n * 2;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	ma_indr = (struct ireg_buf *)temp_buff.data;
+	for (i = 0; i < n; i++) {
+		struct ireg_field *ma_fli = &ma_indr->tp_pio;
+		u32 *buff = ma_indr->outbuf;
+
+		ma_fli->ireg_addr = t6_ma_ireg_array[i][0];
+		ma_fli->ireg_data = t6_ma_ireg_array[i][1];
+		ma_fli->ireg_local_offset = t6_ma_ireg_array[i][2];
+		ma_fli->ireg_offset_range = t6_ma_ireg_array[i][3];
+		t4_read_indirect(padap, ma_fli->ireg_addr, ma_fli->ireg_data,
+				 buff, ma_fli->ireg_offset_range,
+				 ma_fli->ireg_local_offset);
+		ma_indr++;
+	}
+
+	n = sizeof(t6_ma_ireg_array2) / (IREG_NUM_ELEM * sizeof(u32));
+	for (i = 0; i < n; i++) {
+		struct ireg_field *ma_fli = &ma_indr->tp_pio;
+		u32 *buff = ma_indr->outbuf;
+
+		ma_fli->ireg_addr = t6_ma_ireg_array2[i][0];
+		ma_fli->ireg_data = t6_ma_ireg_array2[i][1];
+		ma_fli->ireg_local_offset = t6_ma_ireg_array2[i][2];
+		for (j = 0; j < t6_ma_ireg_array2[i][3]; j++) {
+			t4_read_indirect(padap, ma_fli->ireg_addr,
+					 ma_fli->ireg_data, buff, 1,
+					 ma_fli->ireg_local_offset);
+			buff++;
+			ma_fli->ireg_local_offset += 0x20;
+		}
+		ma_indr++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_ulptx_la *ulptx_la_buff;
+	u32 i, j;
+	int rc;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulptx_la),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	ulptx_la_buff = (struct cudbg_ulptx_la *)temp_buff.data;
+	for (i = 0; i < CUDBG_NUM_ULPTX; i++) {
+		ulptx_la_buff->rdptr[i] = t4_read_reg(padap,
+						      ULP_TX_LA_RDPTR_0_A +
+						      0x10 * i);
+		ulptx_la_buff->wrptr[i] = t4_read_reg(padap,
+						      ULP_TX_LA_WRPTR_0_A +
+						      0x10 * i);
+		ulptx_la_buff->rddata[i] = t4_read_reg(padap,
+						       ULP_TX_LA_RDDATA_0_A +
+						       0x10 * i);
+		for (j = 0; j < CUDBG_NUM_ULPTX_READ; j++)
+			ulptx_la_buff->rd_data[i][j] =
+				t4_read_reg(padap,
+					    ULP_TX_LA_RDDATA_0_A + 0x10 * i);
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init,
+				  struct cudbg_buffer *dbg_buff,
+				  struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct ireg_buf *up_cim;
+	int i, rc, n;
+	u32 size;
+
+	n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32));
+	size = sizeof(struct ireg_buf) * n;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	up_cim = (struct ireg_buf *)temp_buff.data;
+	for (i = 0; i < n; i++) {
+		struct ireg_field *up_cim_reg = &up_cim->tp_pio;
+		u32 *buff = up_cim->outbuf;
+
+		if (is_t5(padap->params.chip)) {
+			up_cim_reg->ireg_addr = t5_up_cim_reg_array[i][0];
+			up_cim_reg->ireg_data = t5_up_cim_reg_array[i][1];
+			up_cim_reg->ireg_local_offset =
+						t5_up_cim_reg_array[i][2];
+			up_cim_reg->ireg_offset_range =
+						t5_up_cim_reg_array[i][3];
+		} else if (is_t6(padap->params.chip)) {
+			up_cim_reg->ireg_addr = t6_up_cim_reg_array[i][0];
+			up_cim_reg->ireg_data = t6_up_cim_reg_array[i][1];
+			up_cim_reg->ireg_local_offset =
+						t6_up_cim_reg_array[i][2];
+			up_cim_reg->ireg_offset_range =
+						t6_up_cim_reg_array[i][3];
+		}
+
+		rc = t4_cim_read(padap, up_cim_reg->ireg_local_offset,
+				 up_cim_reg->ireg_offset_range, buff);
+		if (rc) {
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+		up_cim++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init,
+			     struct cudbg_buffer *dbg_buff,
+			     struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct cudbg_pbt_tables *pbt;
+	int i, rc;
+	u32 addr;
+
+	rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pbt_tables),
+			    &temp_buff);
+	if (rc)
+		return rc;
+
+	pbt = (struct cudbg_pbt_tables *)temp_buff.data;
+	/* PBT dynamic entries */
+	addr = CUDBG_CHAC_PBT_ADDR;
+	for (i = 0; i < CUDBG_PBT_DYNAMIC_ENTRIES; i++) {
+		rc = t4_cim_read(padap, addr + (i * 4), 1,
+				 &pbt->pbt_dynamic[i]);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+	}
+
+	/* PBT static entries */
+	/* static entries start when bit 6 is set */
+	addr = CUDBG_CHAC_PBT_ADDR + (1 << 6);
+	for (i = 0; i < CUDBG_PBT_STATIC_ENTRIES; i++) {
+		rc = t4_cim_read(padap, addr + (i * 4), 1,
+				 &pbt->pbt_static[i]);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+	}
+
+	/* LRF entries */
+	addr = CUDBG_CHAC_PBT_LRF;
+	for (i = 0; i < CUDBG_LRF_ENTRIES; i++) {
+		rc = t4_cim_read(padap, addr + (i * 4), 1,
+				 &pbt->lrf_table[i]);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+	}
+
+	/* PBT data entries */
+	addr = CUDBG_CHAC_PBT_DATA;
+	for (i = 0; i < CUDBG_PBT_DATA_ENTRIES; i++) {
+		rc = t4_cim_read(padap, addr + (i * 4), 1,
+				 &pbt->pbt_data[i]);
+		if (rc) {
+			cudbg_err->sys_err = rc;
+			cudbg_put_buff(&temp_buff, dbg_buff);
+			return rc;
+		}
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_mbox_log *mboxlog = NULL;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct mbox_cmd_log *log = NULL;
+	struct mbox_cmd *entry;
+	unsigned int entry_idx;
+	u16 mbox_cmds;
+	int i, k, rc;
+	u64 flit;
+	u32 size;
+
+	log = padap->mbox_log;
+	mbox_cmds = padap->mbox_log->size;
+	size = sizeof(struct cudbg_mbox_log) * mbox_cmds;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	mboxlog = (struct cudbg_mbox_log *)temp_buff.data;
+	for (k = 0; k < mbox_cmds; k++) {
+		entry_idx = log->cursor + k;
+		if (entry_idx >= log->size)
+			entry_idx -= log->size;
+
+		entry = mbox_cmd_log_entry(log, entry_idx);
+		/* skip over unused entries */
+		if (entry->timestamp == 0)
+			continue;
+
+		memcpy(&mboxlog->entry, entry, sizeof(struct mbox_cmd));
+		for (i = 0; i < MBOX_LEN / 8; i++) {
+			flit = entry->cmd[i];
+			mboxlog->hi[i] = (u32)(flit >> 32);
+			mboxlog->lo[i] = (u32)flit;
+		}
+		mboxlog++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
+
+int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err)
+{
+	struct adapter *padap = pdbg_init->adap;
+	struct cudbg_buffer temp_buff = { 0 };
+	struct ireg_buf *hma_indr;
+	int i, rc, n;
+	u32 size;
+
+	if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6)
+		return CUDBG_STATUS_ENTITY_NOT_FOUND;
+
+	n = sizeof(t6_hma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32));
+	size = sizeof(struct ireg_buf) * n;
+	rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+	if (rc)
+		return rc;
+
+	hma_indr = (struct ireg_buf *)temp_buff.data;
+	for (i = 0; i < n; i++) {
+		struct ireg_field *hma_fli = &hma_indr->tp_pio;
+		u32 *buff = hma_indr->outbuf;
+
+		hma_fli->ireg_addr = t6_hma_ireg_array[i][0];
+		hma_fli->ireg_data = t6_hma_ireg_array[i][1];
+		hma_fli->ireg_local_offset = t6_hma_ireg_array[i][2];
+		hma_fli->ireg_offset_range = t6_hma_ireg_array[i][3];
+		t4_read_indirect(padap, hma_fli->ireg_addr, hma_fli->ireg_data,
+				 buff, hma_fli->ireg_offset_range,
+				 hma_fli->ireg_local_offset);
+		hma_indr++;
+	}
+	cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+	return rc;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
new file mode 100644
index 000000000000..caeee8e33e86
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
@@ -0,0 +1,169 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_LIB_H__
+#define __CUDBG_LIB_H__
+
+int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init,
+			    struct cudbg_buffer *dbg_buff,
+			    struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_la(struct cudbg_init *pdbg_init,
+			 struct cudbg_buffer *dbg_buff,
+			 struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init,
+			    struct cudbg_buffer *dbg_buff,
+			    struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_rss(struct cudbg_init *pdbg_init,
+		      struct cudbg_buffer *dbg_buff,
+		      struct cudbg_error *cudbg_err);
+int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err);
+int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err);
+int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_tp_la(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init,
+			     struct cudbg_buffer *dbg_buff,
+			     struct cudbg_error *cudbg_err);
+int cudbg_collect_clk_info(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err);
+int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err);
+int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init,
+				struct cudbg_buffer *dbg_buff,
+				struct cudbg_error *cudbg_err);
+int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err);
+int cudbg_collect_tid(struct cudbg_init *pdbg_init,
+		      struct cudbg_buffer *dbg_buff,
+		      struct cudbg_error *cudbg_err);
+int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init,
+			  struct cudbg_buffer *dbg_buff,
+			  struct cudbg_error *cudbg_err);
+int cudbg_collect_cctrl(struct cudbg_init *pdbg_init,
+			struct cudbg_buffer *dbg_buff,
+			struct cudbg_error *cudbg_err);
+int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init,
+			      struct cudbg_buffer *dbg_buff,
+			      struct cudbg_error *cudbg_err);
+int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init,
+				  struct cudbg_buffer *dbg_buff,
+				  struct cudbg_error *cudbg_err);
+int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init,
+			     struct cudbg_buffer *dbg_buff,
+			     struct cudbg_error *cudbg_err);
+int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init,
+			   struct cudbg_buffer *dbg_buff,
+			   struct cudbg_error *cudbg_err);
+int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init,
+			       struct cudbg_buffer *dbg_buff,
+			       struct cudbg_error *cudbg_err);
+
+struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i);
+void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
+			      struct cudbg_entity_hdr *entity_hdr);
+u32 cudbg_cim_obq_size(struct adapter *padap, int qid);
+int cudbg_dump_context_size(struct adapter *padap);
+
+struct cudbg_tcam;
+void cudbg_fill_le_tcam_info(struct adapter *padap,
+			     struct cudbg_tcam *tcam_region);
+#endif /* __CUDBG_LIB_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h
new file mode 100644
index 000000000000..24b33f28e548
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_LIB_COMMON_H__
+#define __CUDBG_LIB_COMMON_H__
+
+#define CUDBG_SIGNATURE 67856866 /* CUDB in ascii */
+
+enum cudbg_dump_type {
+	CUDBG_DUMP_TYPE_MINI = 1,
+};
+
+enum cudbg_compression_type {
+	CUDBG_COMPRESSION_NONE = 1,
+};
+
+struct cudbg_hdr {
+	u32 signature;
+	u32 hdr_len;
+	u16 major_ver;
+	u16 minor_ver;
+	u32 data_len;
+	u32 hdr_flags;
+	u16 max_entities;
+	u8 chip_ver;
+	u8 dump_type:3;
+	u8 reserved1:1;
+	u8 compress_type:4;
+	u32 reserved[8];
+};
+
+struct cudbg_entity_hdr {
+	u32 entity_type;
+	u32 start_offset;
+	u32 size;
+	int hdr_flags;
+	u32 sys_warn;
+	u32 sys_err;
+	u8 num_pad;
+	u8 flag;             /* bit 0 is used to indicate ext data */
+	u8 reserved1[2];
+	u32 next_ext_offset; /* pointer to next extended entity meta data */
+	u32 reserved[5];
+};
+
+struct cudbg_ver_hdr {
+	u32 signature;
+	u16 revision;
+	u16 size;
+};
+
+struct cudbg_buffer {
+	u32 size;
+	u32 offset;
+	char *data;
+};
+
+struct cudbg_error {
+	int sys_err;
+	int sys_warn;
+	int app_err;
+};
+
+#define CDUMP_MAX_COMP_BUF_SIZE ((64 * 1024) - 1)
+#define CUDBG_CHUNK_SIZE ((CDUMP_MAX_COMP_BUF_SIZE / 1024) * 1024)
+
+int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size,
+		   struct cudbg_buffer *pin_buff);
+void cudbg_put_buff(struct cudbg_buffer *pin_buff,
+		    struct cudbg_buffer *pdbg_buff);
+void cudbg_update_buff(struct cudbg_buffer *pin_buff,
+		       struct cudbg_buffer *pout_buff);
+#endif /* __CUDBG_LIB_COMMON_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ea72d2d2e1b4..6f9fa6e3c42a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -287,10 +287,18 @@ struct tp_params {
 	 * places we store their offsets here, or a -1 if the field isn't
 	 * present.
 	 */
-	int vlan_shift;
-	int vnic_shift;
+	int fcoe_shift;
 	int port_shift;
+	int vnic_shift;
+	int vlan_shift;
+	int tos_shift;
 	int protocol_shift;
+	int ethertype_shift;
+	int macmatch_shift;
+	int matchtype_shift;
+	int frag_shift;
+
+	u64 hash_filter_mask;
 };
 
 struct vpd_params {
@@ -358,6 +366,7 @@ struct adapter_params {
 	unsigned char crypto;		/* HW capability for crypto */
 
 	unsigned char bypass;
+	unsigned char hash_filter;
 
 	unsigned int ofldq_wr_cred;
 	bool ulptx_memwrite_dsgl;          /* use of T5 DSGL allowed */
@@ -367,6 +376,7 @@ struct adapter_params {
 	unsigned int max_ird_adapter;     /* Max read depth per adapter */
 	bool fr_nsmr_tpte_wr_support;	  /* FW support for FR_NSMR_TPTE_WR */
 	u8 fw_caps_support;		/* 32-bit Port Capabilities */
+	bool filter2_wr_support;	/* FW support for FILTER2_WR */
 
 	/* MPS Buffer Group Map[per Port].  Bit i is set if buffer group i is
 	 * used by the Port
@@ -549,6 +559,7 @@ enum {                                 /* adapter flags */
 	MASTER_PF          = (1 << 7),
 	FW_OFLD_CONN       = (1 << 9),
 	ROOT_NO_RELAXED_ORDERING = (1 << 10),
+	SHUTTING_DOWN	   = (1 << 11),
 };
 
 enum {
@@ -857,6 +868,7 @@ struct adapter {
 	unsigned int clipt_start;
 	unsigned int clipt_end;
 	struct clip_tbl *clipt;
+	struct smt_data *smt;
 	struct cxgb4_uld_info *uld;
 	void *uld_handle[CXGB4_ULD_MAX];
 	unsigned int num_uld;
@@ -904,6 +916,15 @@ struct adapter {
 	/* TC u32 offload */
 	struct cxgb4_tc_u32_table *tc_u32;
 	struct chcr_stats_debug chcr_stats;
+
+	/* TC flower offload */
+	struct rhashtable flower_tbl;
+	struct rhashtable_params flower_ht_params;
+	struct timer_list flower_stats_timer;
+	struct work_struct flower_stats_work;
+
+	/* Ethtool Dump */
+	struct ethtool_dump eth_dump;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1031,6 +1052,7 @@ struct ch_filter_specification {
 	 * matching that doesn't exist as a (value, mask) tuple.
 	 */
 	uint32_t type:1;        /* 0 => IPv4, 1 => IPv6 */
+	u32 hash:1;		/* 0 => wild-card, 1 => exact-match */
 
 	/* Packet dispatch information.  Ingress packets which match the
 	 * filter rules will be dropped, passed to the host or switched back
@@ -1055,10 +1077,19 @@ struct ch_filter_specification {
 	uint32_t newdmac:1;     /* rewrite destination MAC address */
 	uint32_t newsmac:1;     /* rewrite source MAC address */
 	uint32_t newvlan:2;     /* rewrite VLAN Tag */
+	uint32_t nat_mode:3;    /* specify NAT operation mode */
 	uint8_t dmac[ETH_ALEN]; /* new destination MAC address */
 	uint8_t smac[ETH_ALEN]; /* new source MAC address */
 	uint16_t vlan;          /* VLAN Tag to insert */
 
+	u8 nat_lip[16];		/* local IP to use after NAT'ing */
+	u8 nat_fip[16];		/* foreign IP to use after NAT'ing */
+	u16 nat_lport;		/* local port to use after NAT'ing */
+	u16 nat_fport;		/* foreign port to use after NAT'ing */
+
+	/* reservation for future additions */
+	u8 rsvd[24];
+
 	/* Filter rule value/mask pairs.
 	 */
 	struct ch_filter_tuple val;
@@ -1078,6 +1109,17 @@ enum {
 	VLAN_REWRITE
 };
 
+enum {
+	NAT_MODE_NONE = 0,	/* No NAT performed */
+	NAT_MODE_DIP,		/* NAT on Dst IP */
+	NAT_MODE_DIP_DP,	/* NAT on Dst IP, Dst Port */
+	NAT_MODE_DIP_DP_SIP,	/* NAT on Dst IP, Dst Port and Src IP */
+	NAT_MODE_DIP_DP_SP,	/* NAT on Dst IP, Dst Port and Src Port */
+	NAT_MODE_SIP_SP,	/* NAT on Src IP and Src Port */
+	NAT_MODE_DIP_SIP_SP,	/* NAT on Dst IP, Src IP and Src Port */
+	NAT_MODE_ALL		/* NAT on entire 4-tuple */
+};
+
 /* Host shadow copy of ingress filter entry.  This is in host native format
  * and doesn't match the ordering or bit order, etc. of the hardware of the
  * firmware command.  The use of bit-field structure elements is purely to
@@ -1090,9 +1132,9 @@ struct filter_entry {
 	u32 locked:1;           /* filter is administratively locked */
 
 	u32 pending:1;          /* filter action is pending firmware reply */
-	u32 smtidx:8;           /* Source MAC Table index for smac */
 	struct filter_ctx *ctx; /* Caller's completion hook */
 	struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
+	struct smt_entry *smt;  /* Source Mac Table entry for smac */
 	struct net_device *dev; /* Associated net device */
 	u32 tid;                /* This will store the actual tid */
 
@@ -1109,6 +1151,11 @@ static inline int is_offload(const struct adapter *adap)
 	return adap->params.offload;
 }
 
+static inline int is_hashfilter(const struct adapter *adap)
+{
+	return adap->params.hash_filter;
+}
+
 static inline int is_pci_uld(const struct adapter *adap)
 {
 	return adap->params.crypto;
@@ -1312,6 +1359,12 @@ static inline unsigned int core_ticks_to_us(const struct adapter *adapter,
 		adapter->params.vpd.cclk);
 }
 
+static inline unsigned int dack_ticks_to_usec(const struct adapter *adap,
+					      unsigned int ticks)
+{
+	return (ticks << adap->params.tp.dack_re) / core_ticks_per_usec(adap);
+}
+
 void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask,
 		      u32 val);
 
@@ -1406,6 +1459,7 @@ static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr,
 unsigned int t4_get_regs_len(struct adapter *adapter);
 void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size);
 
+int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz);
 int t4_seeprom_wp(struct adapter *adapter, bool enable);
 int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p);
 int t4_get_vpd_params(struct adapter *adapter, struct vpd_params *p);
@@ -1451,7 +1505,7 @@ unsigned int qtimer_val(const struct adapter *adap,
 
 int t4_init_devlog_params(struct adapter *adapter);
 int t4_init_sge_params(struct adapter *adapter);
-int t4_init_tp_params(struct adapter *adap);
+int t4_init_tp_params(struct adapter *adap, bool sleep_ok);
 int t4_filter_field_shift(const struct adapter *adap, int filter_sel);
 int t4_init_rss_mode(struct adapter *adap, int mbox);
 int t4_init_portinfo(struct port_info *pi, int mbox,
@@ -1465,14 +1519,15 @@ int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode,
 int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid,
 		     unsigned int flags, unsigned int defq);
 int t4_read_rss(struct adapter *adapter, u16 *entries);
-void t4_read_rss_key(struct adapter *adapter, u32 *key);
-void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx);
+void t4_read_rss_key(struct adapter *adapter, u32 *key, bool sleep_ok);
+void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx,
+		      bool sleep_ok);
 void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index,
-			   u32 *valp);
+			   u32 *valp, bool sleep_ok);
 void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index,
-			   u32 *vfl, u32 *vfh);
-u32 t4_read_rss_pf_map(struct adapter *adapter);
-u32 t4_read_rss_pf_mask(struct adapter *adapter);
+			   u32 *vfl, u32 *vfh, bool sleep_ok);
+u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok);
+u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok);
 
 unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx);
 unsigned int t4_get_tp_ch_map(struct adapter *adapter, int pidx);
@@ -1503,14 +1558,18 @@ void t4_read_cong_tbl(struct adapter *adap, u16 incr[NMTUS][NCCTRL_WIN]);
 void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr,
 			    unsigned int mask, unsigned int val);
 void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr);
-void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st);
-void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st);
-void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st);
-void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st);
+void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st,
+			 bool sleep_ok);
+void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st,
+			 bool sleep_ok);
+void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st,
+			  bool sleep_ok);
+void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st,
+		      bool sleep_ok);
 void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
-			 struct tp_tcp_stats *v6);
+			 struct tp_tcp_stats *v6, bool sleep_ok);
 void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx,
-		       struct tp_fcoe_stats *st);
+		       struct tp_fcoe_stats *st, bool sleep_ok);
 void t4_load_mtus(struct adapter *adap, const unsigned short *mtus,
 		  const unsigned short *alpha, const unsigned short *beta);
 
@@ -1608,6 +1667,13 @@ void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp,
 			 int filter_index, int *enabled);
 int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
 			 u32 addr, u32 val);
+void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]);
+void t4_get_tx_sched(struct adapter *adap, unsigned int sched,
+		     unsigned int *kbps, unsigned int *ipg, bool sleep_ok);
+int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid,
+		   enum ctxt_type ctype, u32 *data);
+int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
+		      enum ctxt_type ctype, u32 *data);
 int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 		    int rateunit, int ratemode, int channel, int class,
 		    int minrate, int maxrate, int weight, int pktsize);
@@ -1619,6 +1685,13 @@ void t4_idma_monitor(struct adapter *adapter,
 		     int hz, int ticks);
 int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
 		      unsigned int naddr, u8 *addr);
+void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+		    u32 start_index, bool sleep_ok);
+void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+		       u32 start_index, bool sleep_ok);
+void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs,
+		    u32 start_index, bool sleep_ok);
+
 void t4_uld_mem_free(struct adapter *adap);
 int t4_uld_mem_alloc(struct adapter *adap);
 void t4_uld_clean_up(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
new file mode 100644
index 000000000000..29cc625e9833
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -0,0 +1,403 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#include "t4_regs.h"
+#include "cxgb4.h"
+#include "cxgb4_cudbg.h"
+#include "cudbg_entity.h"
+
+static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = {
+	{ CUDBG_EDC0, cudbg_collect_edc0_meminfo },
+	{ CUDBG_EDC1, cudbg_collect_edc1_meminfo },
+};
+
+static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = {
+	{ CUDBG_MBOX_LOG, cudbg_collect_mbox_log },
+	{ CUDBG_DEV_LOG, cudbg_collect_fw_devlog },
+	{ CUDBG_REG_DUMP, cudbg_collect_reg_dump },
+	{ CUDBG_CIM_LA, cudbg_collect_cim_la },
+	{ CUDBG_CIM_MA_LA, cudbg_collect_cim_ma_la },
+	{ CUDBG_CIM_QCFG, cudbg_collect_cim_qcfg },
+	{ CUDBG_CIM_IBQ_TP0, cudbg_collect_cim_ibq_tp0 },
+	{ CUDBG_CIM_IBQ_TP1, cudbg_collect_cim_ibq_tp1 },
+	{ CUDBG_CIM_IBQ_ULP, cudbg_collect_cim_ibq_ulp },
+	{ CUDBG_CIM_IBQ_SGE0, cudbg_collect_cim_ibq_sge0 },
+	{ CUDBG_CIM_IBQ_SGE1, cudbg_collect_cim_ibq_sge1 },
+	{ CUDBG_CIM_IBQ_NCSI, cudbg_collect_cim_ibq_ncsi },
+	{ CUDBG_CIM_OBQ_ULP0, cudbg_collect_cim_obq_ulp0 },
+	{ CUDBG_CIM_OBQ_ULP1, cudbg_collect_cim_obq_ulp1 },
+	{ CUDBG_CIM_OBQ_ULP2, cudbg_collect_cim_obq_ulp2 },
+	{ CUDBG_CIM_OBQ_ULP3, cudbg_collect_cim_obq_ulp3 },
+	{ CUDBG_CIM_OBQ_SGE, cudbg_collect_cim_obq_sge },
+	{ CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi },
+	{ CUDBG_RSS, cudbg_collect_rss },
+	{ CUDBG_RSS_VF_CONF, cudbg_collect_rss_vf_config },
+	{ CUDBG_PATH_MTU, cudbg_collect_path_mtu },
+	{ CUDBG_PM_STATS, cudbg_collect_pm_stats },
+	{ CUDBG_HW_SCHED, cudbg_collect_hw_sched },
+	{ CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect },
+	{ CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect },
+	{ CUDBG_ULPRX_LA, cudbg_collect_ulprx_la },
+	{ CUDBG_TP_LA, cudbg_collect_tp_la },
+	{ CUDBG_CIM_PIF_LA, cudbg_collect_cim_pif_la },
+	{ CUDBG_CLK, cudbg_collect_clk_info },
+	{ CUDBG_CIM_OBQ_RXQ0, cudbg_collect_obq_sge_rx_q0 },
+	{ CUDBG_CIM_OBQ_RXQ1, cudbg_collect_obq_sge_rx_q1 },
+	{ CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect },
+	{ CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect },
+	{ CUDBG_TID_INFO, cudbg_collect_tid },
+	{ CUDBG_DUMP_CONTEXT, cudbg_collect_dump_context },
+	{ CUDBG_MPS_TCAM, cudbg_collect_mps_tcam },
+	{ CUDBG_VPD_DATA, cudbg_collect_vpd_data },
+	{ CUDBG_LE_TCAM, cudbg_collect_le_tcam },
+	{ CUDBG_CCTRL, cudbg_collect_cctrl },
+	{ CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect },
+	{ CUDBG_ULPTX_LA, cudbg_collect_ulptx_la },
+	{ CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect },
+	{ CUDBG_PBT_TABLE, cudbg_collect_pbt_tables },
+	{ CUDBG_HMA_INDIRECT, cudbg_collect_hma_indirect },
+};
+
+static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
+{
+	struct cudbg_tcam tcam_region = { 0 };
+	u32 value, n = 0, len = 0;
+
+	switch (entity) {
+	case CUDBG_REG_DUMP:
+		switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
+		case CHELSIO_T4:
+			len = T4_REGMAP_SIZE;
+			break;
+		case CHELSIO_T5:
+		case CHELSIO_T6:
+			len = T5_REGMAP_SIZE;
+			break;
+		default:
+			break;
+		}
+		break;
+	case CUDBG_DEV_LOG:
+		len = adap->params.devlog.size;
+		break;
+	case CUDBG_CIM_LA:
+		if (is_t6(adap->params.chip)) {
+			len = adap->params.cim_la_size / 10 + 1;
+			len *= 11 * sizeof(u32);
+		} else {
+			len = adap->params.cim_la_size / 8;
+			len *= 8 * sizeof(u32);
+		}
+		len += sizeof(u32); /* for reading CIM LA configuration */
+		break;
+	case CUDBG_CIM_MA_LA:
+		len = 2 * CIM_MALA_SIZE * 5 * sizeof(u32);
+		break;
+	case CUDBG_CIM_QCFG:
+		len = sizeof(struct cudbg_cim_qcfg);
+		break;
+	case CUDBG_CIM_IBQ_TP0:
+	case CUDBG_CIM_IBQ_TP1:
+	case CUDBG_CIM_IBQ_ULP:
+	case CUDBG_CIM_IBQ_SGE0:
+	case CUDBG_CIM_IBQ_SGE1:
+	case CUDBG_CIM_IBQ_NCSI:
+		len = CIM_IBQ_SIZE * 4 * sizeof(u32);
+		break;
+	case CUDBG_CIM_OBQ_ULP0:
+		len = cudbg_cim_obq_size(adap, 0);
+		break;
+	case CUDBG_CIM_OBQ_ULP1:
+		len = cudbg_cim_obq_size(adap, 1);
+		break;
+	case CUDBG_CIM_OBQ_ULP2:
+		len = cudbg_cim_obq_size(adap, 2);
+		break;
+	case CUDBG_CIM_OBQ_ULP3:
+		len = cudbg_cim_obq_size(adap, 3);
+		break;
+	case CUDBG_CIM_OBQ_SGE:
+		len = cudbg_cim_obq_size(adap, 4);
+		break;
+	case CUDBG_CIM_OBQ_NCSI:
+		len = cudbg_cim_obq_size(adap, 5);
+		break;
+	case CUDBG_CIM_OBQ_RXQ0:
+		len = cudbg_cim_obq_size(adap, 6);
+		break;
+	case CUDBG_CIM_OBQ_RXQ1:
+		len = cudbg_cim_obq_size(adap, 7);
+		break;
+	case CUDBG_EDC0:
+		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+		if (value & EDRAM0_ENABLE_F) {
+			value = t4_read_reg(adap, MA_EDRAM0_BAR_A);
+			len = EDRAM0_SIZE_G(value);
+		}
+		len = cudbg_mbytes_to_bytes(len);
+		break;
+	case CUDBG_EDC1:
+		value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+		if (value & EDRAM1_ENABLE_F) {
+			value = t4_read_reg(adap, MA_EDRAM1_BAR_A);
+			len = EDRAM1_SIZE_G(value);
+		}
+		len = cudbg_mbytes_to_bytes(len);
+		break;
+	case CUDBG_RSS:
+		len = RSS_NENTRIES * sizeof(u16);
+		break;
+	case CUDBG_RSS_VF_CONF:
+		len = adap->params.arch.vfcount *
+		      sizeof(struct cudbg_rss_vf_conf);
+		break;
+	case CUDBG_PATH_MTU:
+		len = NMTUS * sizeof(u16);
+		break;
+	case CUDBG_PM_STATS:
+		len = sizeof(struct cudbg_pm_stats);
+		break;
+	case CUDBG_HW_SCHED:
+		len = sizeof(struct cudbg_hw_sched);
+		break;
+	case CUDBG_TP_INDIRECT:
+		switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
+		case CHELSIO_T5:
+			n = sizeof(t5_tp_pio_array) +
+			    sizeof(t5_tp_tm_pio_array) +
+			    sizeof(t5_tp_mib_index_array);
+			break;
+		case CHELSIO_T6:
+			n = sizeof(t6_tp_pio_array) +
+			    sizeof(t6_tp_tm_pio_array) +
+			    sizeof(t6_tp_mib_index_array);
+			break;
+		default:
+			break;
+		}
+		n = n / (IREG_NUM_ELEM * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n;
+		break;
+	case CUDBG_SGE_INDIRECT:
+		len = sizeof(struct ireg_buf) * 2;
+		break;
+	case CUDBG_ULPRX_LA:
+		len = sizeof(struct cudbg_ulprx_la);
+		break;
+	case CUDBG_TP_LA:
+		len = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64);
+		break;
+	case CUDBG_CIM_PIF_LA:
+		len = sizeof(struct cudbg_cim_pif_la);
+		len += 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32);
+		break;
+	case CUDBG_CLK:
+		len = sizeof(struct cudbg_clk_info);
+		break;
+	case CUDBG_PCIE_INDIRECT:
+		n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n * 2;
+		break;
+	case CUDBG_PM_INDIRECT:
+		n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n * 2;
+		break;
+	case CUDBG_TID_INFO:
+		len = sizeof(struct cudbg_tid_info_region_rev1);
+		break;
+	case CUDBG_DUMP_CONTEXT:
+		len = cudbg_dump_context_size(adap);
+		break;
+	case CUDBG_MPS_TCAM:
+		len = sizeof(struct cudbg_mps_tcam) *
+		      adap->params.arch.mps_tcam_size;
+		break;
+	case CUDBG_VPD_DATA:
+		len = sizeof(struct cudbg_vpd_data);
+		break;
+	case CUDBG_LE_TCAM:
+		cudbg_fill_le_tcam_info(adap, &tcam_region);
+		len = sizeof(struct cudbg_tcam) +
+		      sizeof(struct cudbg_tid_data) * tcam_region.max_tid;
+		break;
+	case CUDBG_CCTRL:
+		len = sizeof(u16) * NMTUS * NCCTRL_WIN;
+		break;
+	case CUDBG_MA_INDIRECT:
+		if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+			n = sizeof(t6_ma_ireg_array) /
+			    (IREG_NUM_ELEM * sizeof(u32));
+			len = sizeof(struct ireg_buf) * n * 2;
+		}
+		break;
+	case CUDBG_ULPTX_LA:
+		len = sizeof(struct cudbg_ulptx_la);
+		break;
+	case CUDBG_UP_CIM_INDIRECT:
+		n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32));
+		len = sizeof(struct ireg_buf) * n;
+		break;
+	case CUDBG_PBT_TABLE:
+		len = sizeof(struct cudbg_pbt_tables);
+		break;
+	case CUDBG_MBOX_LOG:
+		len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size;
+		break;
+	case CUDBG_HMA_INDIRECT:
+		if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+			n = sizeof(t6_hma_ireg_array) /
+			    (IREG_NUM_ELEM * sizeof(u32));
+			len = sizeof(struct ireg_buf) * n;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return len;
+}
+
+u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag)
+{
+	u32 i, entity;
+	u32 len = 0;
+
+	if (flag & CXGB4_ETH_DUMP_HW) {
+		for (i = 0; i < ARRAY_SIZE(cxgb4_collect_hw_dump); i++) {
+			entity = cxgb4_collect_hw_dump[i].entity;
+			len += cxgb4_get_entity_length(adap, entity);
+		}
+	}
+
+	if (flag & CXGB4_ETH_DUMP_MEM) {
+		for (i = 0; i < ARRAY_SIZE(cxgb4_collect_mem_dump); i++) {
+			entity = cxgb4_collect_mem_dump[i].entity;
+			len += cxgb4_get_entity_length(adap, entity);
+		}
+	}
+
+	return len;
+}
+
+static void cxgb4_cudbg_collect_entity(struct cudbg_init *pdbg_init,
+				       struct cudbg_buffer *dbg_buff,
+				       const struct cxgb4_collect_entity *e_arr,
+				       u32 arr_size, void *buf, u32 *tot_size)
+{
+	struct adapter *adap = pdbg_init->adap;
+	struct cudbg_error cudbg_err = { 0 };
+	struct cudbg_entity_hdr *entity_hdr;
+	u32 entity_size, i;
+	u32 total_size = 0;
+	int ret;
+
+	for (i = 0; i < arr_size; i++) {
+		const struct cxgb4_collect_entity *e = &e_arr[i];
+
+		/* Skip entities that won't fit in output buffer */
+		entity_size = cxgb4_get_entity_length(adap, e->entity);
+		if (entity_size >
+		    pdbg_init->outbuf_size - *tot_size - total_size)
+			continue;
+
+		entity_hdr = cudbg_get_entity_hdr(buf, e->entity);
+		entity_hdr->entity_type = e->entity;
+		entity_hdr->start_offset = dbg_buff->offset;
+		memset(&cudbg_err, 0, sizeof(struct cudbg_error));
+		ret = e->collect_cb(pdbg_init, dbg_buff, &cudbg_err);
+		if (ret) {
+			entity_hdr->size = 0;
+			dbg_buff->offset = entity_hdr->start_offset;
+		} else {
+			cudbg_align_debug_buffer(dbg_buff, entity_hdr);
+		}
+
+		/* Log error and continue with next entity */
+		if (cudbg_err.sys_err)
+			ret = CUDBG_SYSTEM_ERROR;
+
+		entity_hdr->hdr_flags = ret;
+		entity_hdr->sys_err = cudbg_err.sys_err;
+		entity_hdr->sys_warn = cudbg_err.sys_warn;
+		total_size += entity_hdr->size;
+	}
+
+	*tot_size += total_size;
+}
+
+int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
+			u32 flag)
+{
+	struct cudbg_init cudbg_init = { 0 };
+	struct cudbg_buffer dbg_buff = { 0 };
+	u32 size, min_size, total_size = 0;
+	struct cudbg_hdr *cudbg_hdr;
+
+	size = *buf_size;
+
+	cudbg_init.adap = adap;
+	cudbg_init.outbuf = buf;
+	cudbg_init.outbuf_size = size;
+
+	dbg_buff.data = buf;
+	dbg_buff.size = size;
+	dbg_buff.offset = 0;
+
+	cudbg_hdr = (struct cudbg_hdr *)buf;
+	cudbg_hdr->signature = CUDBG_SIGNATURE;
+	cudbg_hdr->hdr_len = sizeof(struct cudbg_hdr);
+	cudbg_hdr->major_ver = CUDBG_MAJOR_VERSION;
+	cudbg_hdr->minor_ver = CUDBG_MINOR_VERSION;
+	cudbg_hdr->max_entities = CUDBG_MAX_ENTITY;
+	cudbg_hdr->chip_ver = adap->params.chip;
+	cudbg_hdr->dump_type = CUDBG_DUMP_TYPE_MINI;
+	cudbg_hdr->compress_type = CUDBG_COMPRESSION_NONE;
+
+	min_size = sizeof(struct cudbg_hdr) +
+		   sizeof(struct cudbg_entity_hdr) *
+		   cudbg_hdr->max_entities;
+	if (size < min_size)
+		return -ENOMEM;
+
+	dbg_buff.offset += min_size;
+	total_size = dbg_buff.offset;
+
+	if (flag & CXGB4_ETH_DUMP_HW)
+		cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
+					   cxgb4_collect_hw_dump,
+					   ARRAY_SIZE(cxgb4_collect_hw_dump),
+					   buf,
+					   &total_size);
+
+	if (flag & CXGB4_ETH_DUMP_MEM)
+		cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
+					   cxgb4_collect_mem_dump,
+					   ARRAY_SIZE(cxgb4_collect_mem_dump),
+					   buf,
+					   &total_size);
+
+	cudbg_hdr->data_len = total_size;
+	*buf_size = total_size;
+	return 0;
+}
+
+void cxgb4_init_ethtool_dump(struct adapter *adapter)
+{
+	adapter->eth_dump.flag = CXGB4_ETH_DUMP_NONE;
+	adapter->eth_dump.version = adapter->params.fw_vers;
+	adapter->eth_dump.len = 0;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
new file mode 100644
index 000000000000..c099b5aa2214
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
@@ -0,0 +1,44 @@
+/*
+ *  Copyright (C) 2017 Chelsio Communications.  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms and conditions of the GNU General Public License,
+ *  version 2, as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ *  more details.
+ *
+ *  The full GNU General Public License is included in this distribution in
+ *  the file called "COPYING".
+ *
+ */
+
+#ifndef __CXGB4_CUDBG_H__
+#define __CXGB4_CUDBG_H__
+
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_lib.h"
+
+typedef int (*cudbg_collect_callback_t)(struct cudbg_init *pdbg_init,
+					struct cudbg_buffer *dbg_buff,
+					struct cudbg_error *cudbg_err);
+
+struct cxgb4_collect_entity {
+	enum cudbg_dbg_entity_type entity;
+	cudbg_collect_callback_t collect_cb;
+};
+
+enum CXGB4_ETHTOOL_DUMP_FLAGS {
+	CXGB4_ETH_DUMP_NONE = ETH_FW_DUMP_DISABLE,
+	CXGB4_ETH_DUMP_MEM = (1 << 0), /* On-Chip Memory Dumps */
+	CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */
+};
+
+u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag);
+int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
+			u32 flag);
+void cxgb4_init_ethtool_dump(struct adapter *adapter);
+#endif /* __CXGB4_CUDBG_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
index 6ee2ed30626b..4e7f72b17e82 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
@@ -40,8 +40,7 @@ static inline bool cxgb4_dcb_state_synced(enum cxgb4_dcb_state state)
 		return false;
 }
 
-/* Initialize a port's Data Center Bridging state.  Typically used after a
- * Link Down event.
+/* Initialize a port's Data Center Bridging state.
  */
 void cxgb4_dcb_state_init(struct net_device *dev)
 {
@@ -106,6 +105,15 @@ static void cxgb4_dcb_cleanup_apps(struct net_device *dev)
 	}
 }
 
+/* Reset a port's Data Center Bridging state.  Typically used after a
+ * Link Down event.
+ */
+void cxgb4_dcb_reset(struct net_device *dev)
+{
+	cxgb4_dcb_cleanup_apps(dev);
+	cxgb4_dcb_state_init(dev);
+}
+
 /* Finite State machine for Data Center Bridging.
  */
 void cxgb4_dcb_state_fsm(struct net_device *dev,
@@ -194,8 +202,7 @@ void cxgb4_dcb_state_fsm(struct net_device *dev,
 			 * state.  We need to reset back to a ground state
 			 * of incomplete.
 			 */
-			cxgb4_dcb_cleanup_apps(dev);
-			cxgb4_dcb_state_init(dev);
+			cxgb4_dcb_reset(dev);
 			dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE;
 			dcb->supported = CXGB4_DCBX_FW_SUPPORT;
 			linkwatch_fire_event(dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
index ccf24d3dc982..02040b99c78a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
@@ -131,6 +131,7 @@ struct port_dcb_info {
 
 void cxgb4_dcb_state_init(struct net_device *);
 void cxgb4_dcb_version_init(struct net_device *);
+void cxgb4_dcb_reset(struct net_device *dev);
 void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input);
 void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *);
 void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 76540b0e082d..917663b35603 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2211,7 +2211,7 @@ static int rss_key_show(struct seq_file *seq, void *v)
 {
 	u32 key[10];
 
-	t4_read_rss_key(seq->private, key);
+	t4_read_rss_key(seq->private, key, true);
 	seq_printf(seq, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x\n",
 		   key[9], key[8], key[7], key[6], key[5], key[4], key[3],
 		   key[2], key[1], key[0]);
@@ -2248,7 +2248,7 @@ static ssize_t rss_key_write(struct file *file, const char __user *buf,
 		}
 	}
 
-	t4_write_rss_key(adap, key, -1);
+	t4_write_rss_key(adap, key, -1, true);
 	return count;
 }
 
@@ -2325,12 +2325,13 @@ static int rss_pf_config_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	pfconf = (struct rss_pf_conf *)p->data;
-	rss_pf_map = t4_read_rss_pf_map(adapter);
-	rss_pf_mask = t4_read_rss_pf_mask(adapter);
+	rss_pf_map = t4_read_rss_pf_map(adapter, true);
+	rss_pf_mask = t4_read_rss_pf_mask(adapter, true);
 	for (pf = 0; pf < 8; pf++) {
 		pfconf[pf].rss_pf_map = rss_pf_map;
 		pfconf[pf].rss_pf_mask = rss_pf_mask;
-		t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config);
+		t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config,
+				      true);
 	}
 	return 0;
 }
@@ -2393,7 +2394,7 @@ static int rss_vf_config_open(struct inode *inode, struct file *file)
 	vfconf = (struct rss_vf_conf *)p->data;
 	for (vf = 0; vf < vfcount; vf++) {
 		t4_read_rss_vf_config(adapter, vf, &vfconf[vf].rss_vf_vfl,
-				      &vfconf[vf].rss_vf_vfh);
+				      &vfconf[vf].rss_vf_vfh, true);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index a71af1e587e2..eb338212f5af 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -21,6 +21,7 @@
 #include "cxgb4.h"
 #include "t4_regs.h"
 #include "t4fw_api.h"
+#include "cxgb4_cudbg.h"
 
 #define EEPROM_MAGIC 0x38E2F10C
 
@@ -335,10 +336,10 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
 	memset(s, 0, sizeof(*s));
 
 	spin_lock(&adap->stats_lock);
-	t4_tp_get_tcp_stats(adap, &v4, &v6);
-	t4_tp_get_rdma_stats(adap, &rdma_stats);
-	t4_get_usm_stats(adap, &usm_stats);
-	t4_tp_get_err_stats(adap, &err_stats);
+	t4_tp_get_tcp_stats(adap, &v4, &v6, false);
+	t4_tp_get_rdma_stats(adap, &rdma_stats, false);
+	t4_get_usm_stats(adap, &usm_stats, false);
+	t4_tp_get_err_stats(adap, &err_stats, false);
 	spin_unlock(&adap->stats_lock);
 
 	s->db_drop = adap->db_stats.db_drop;
@@ -388,9 +389,9 @@ static void collect_channel_stats(struct adapter *adap, struct channel_stats *s,
 	memset(s, 0, sizeof(*s));
 
 	spin_lock(&adap->stats_lock);
-	t4_tp_get_cpl_stats(adap, &cpl_stats);
-	t4_tp_get_err_stats(adap, &err_stats);
-	t4_get_fcoe_stats(adap, i, &fcoe_stats);
+	t4_tp_get_cpl_stats(adap, &cpl_stats, false);
+	t4_tp_get_err_stats(adap, &err_stats, false);
+	t4_get_fcoe_stats(adap, i, &fcoe_stats, false);
 	spin_unlock(&adap->stats_lock);
 
 	s->cpl_req = cpl_stats.req[i];
@@ -1063,40 +1064,11 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 	return 0;
 }
 
-/**
- *	eeprom_ptov - translate a physical EEPROM address to virtual
- *	@phys_addr: the physical EEPROM address
- *	@fn: the PCI function number
- *	@sz: size of function-specific area
- *
- *	Translate a physical EEPROM address to virtual.  The first 1K is
- *	accessed through virtual addresses starting at 31K, the rest is
- *	accessed through virtual addresses starting at 0.
- *
- *	The mapping is as follows:
- *	[0..1K) -> [31K..32K)
- *	[1K..1K+A) -> [31K-A..31K)
- *	[1K+A..ES) -> [0..ES-A-1K)
- *
- *	where A = @fn * @sz, and ES = EEPROM size.
- */
-static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
-{
-	fn *= sz;
-	if (phys_addr < 1024)
-		return phys_addr + (31 << 10);
-	if (phys_addr < 1024 + fn)
-		return 31744 - fn + phys_addr - 1024;
-	if (phys_addr < EEPROMSIZE)
-		return phys_addr - 1024 - fn;
-	return -EINVAL;
-}
-
 /* The next two routines implement eeprom read/write from physical addresses.
  */
 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
 {
-	int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
+	int vaddr = t4_eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
 
 	if (vaddr >= 0)
 		vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
@@ -1105,7 +1077,7 @@ static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
 
 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
 {
-	int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
+	int vaddr = t4_eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE);
 
 	if (vaddr >= 0)
 		vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
@@ -1374,6 +1346,56 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 	return -EOPNOTSUPP;
 }
 
+static int set_dump(struct net_device *dev, struct ethtool_dump *eth_dump)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	u32 len = 0;
+
+	len = sizeof(struct cudbg_hdr) +
+	      sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
+	len += cxgb4_get_dump_length(adapter, eth_dump->flag);
+
+	adapter->eth_dump.flag = eth_dump->flag;
+	adapter->eth_dump.len = len;
+	return 0;
+}
+
+static int get_dump_flag(struct net_device *dev, struct ethtool_dump *eth_dump)
+{
+	struct adapter *adapter = netdev2adap(dev);
+
+	eth_dump->flag = adapter->eth_dump.flag;
+	eth_dump->len = adapter->eth_dump.len;
+	eth_dump->version = adapter->eth_dump.version;
+	return 0;
+}
+
+static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump,
+			 void *buf)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	u32 len = 0;
+	int ret = 0;
+
+	if (adapter->eth_dump.flag == CXGB4_ETH_DUMP_NONE)
+		return -ENOENT;
+
+	len = sizeof(struct cudbg_hdr) +
+	      sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
+	len += cxgb4_get_dump_length(adapter, adapter->eth_dump.flag);
+	if (eth_dump->len < len)
+		return -ENOMEM;
+
+	ret = cxgb4_cudbg_collect(adapter, buf, &len, adapter->eth_dump.flag);
+	if (ret)
+		return ret;
+
+	eth_dump->flag = adapter->eth_dump.flag;
+	eth_dump->len = len;
+	eth_dump->version = adapter->eth_dump.version;
+	return 0;
+}
+
 static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_link_ksettings = get_link_ksettings,
 	.set_link_ksettings = set_link_ksettings,
@@ -1404,7 +1426,10 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_rxfh	   = get_rss_table,
 	.set_rxfh	   = set_rss_table,
 	.flash_device      = set_flash,
-	.get_ts_info       = get_ts_info
+	.get_ts_info       = get_ts_info,
+	.set_dump          = set_dump,
+	.get_dump_flag     = get_dump_flag,
+	.get_dump_data     = get_dump_data,
 };
 
 void cxgb4_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 45b5853ca2f1..5980f308a253 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -31,10 +31,15 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include <net/ipv6.h>
 
 #include "cxgb4.h"
 #include "t4_regs.h"
+#include "t4_tcb.h"
+#include "t4_values.h"
+#include "clip_tbl.h"
 #include "l2t.h"
+#include "smt.h"
 #include "t4fw_api.h"
 #include "cxgb4_filter.h"
 
@@ -48,6 +53,194 @@ static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask)
 	return !(conf & conf_mask) && is_field_set(val, mask);
 }
 
+static int set_tcb_field(struct adapter *adap, struct filter_entry *f,
+			 unsigned int ftid,  u16 word, u64 mask, u64 val,
+			 int no_reply)
+{
+	struct cpl_set_tcb_field *req;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req));
+	memset(req, 0, sizeof(*req));
+	INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid);
+	req->reply_ctrl = htons(REPLY_CHAN_V(0) |
+				QUEUENO_V(adap->sge.fw_evtq.abs_id) |
+				NO_REPLY_V(no_reply));
+	req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(ftid));
+	req->mask = cpu_to_be64(mask);
+	req->val = cpu_to_be64(val);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
+	t4_ofld_send(adap, skb);
+	return 0;
+}
+
+/* Set one of the t_flags bits in the TCB.
+ */
+static int set_tcb_tflag(struct adapter *adap, struct filter_entry *f,
+			 unsigned int ftid, unsigned int bit_pos,
+			 unsigned int val, int no_reply)
+{
+	return set_tcb_field(adap, f, ftid,  TCB_T_FLAGS_W, 1ULL << bit_pos,
+			     (unsigned long long)val << bit_pos, no_reply);
+}
+
+static void mk_abort_req_ulp(struct cpl_abort_req *abort_req, unsigned int tid)
+{
+	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)abort_req;
+	struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1);
+
+	txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0));
+	txpkt->len = htonl(DIV_ROUND_UP(sizeof(*abort_req), 16));
+	sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	sc->len = htonl(sizeof(*abort_req) - sizeof(struct work_request_hdr));
+	OPCODE_TID(abort_req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+	abort_req->rsvd0 = htonl(0);
+	abort_req->rsvd1 = 0;
+	abort_req->cmd = CPL_ABORT_NO_RST;
+}
+
+static void mk_abort_rpl_ulp(struct cpl_abort_rpl *abort_rpl, unsigned int tid)
+{
+	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)abort_rpl;
+	struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1);
+
+	txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0));
+	txpkt->len = htonl(DIV_ROUND_UP(sizeof(*abort_rpl), 16));
+	sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	sc->len = htonl(sizeof(*abort_rpl) - sizeof(struct work_request_hdr));
+	OPCODE_TID(abort_rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
+	abort_rpl->rsvd0 = htonl(0);
+	abort_rpl->rsvd1 = 0;
+	abort_rpl->cmd = CPL_ABORT_NO_RST;
+}
+
+static void mk_set_tcb_ulp(struct filter_entry *f,
+			   struct cpl_set_tcb_field *req,
+			   unsigned int word, u64 mask, u64 val,
+			   u8 cookie, int no_reply)
+{
+	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
+	struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1);
+
+	txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0));
+	txpkt->len = htonl(DIV_ROUND_UP(sizeof(*req), 16));
+	sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	sc->len = htonl(sizeof(*req) - sizeof(struct work_request_hdr));
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, f->tid));
+	req->reply_ctrl = htons(NO_REPLY_V(no_reply) | REPLY_CHAN_V(0) |
+				QUEUENO_V(0));
+	req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(cookie));
+	req->mask = cpu_to_be64(mask);
+	req->val = cpu_to_be64(val);
+	sc = (struct ulptx_idata *)(req + 1);
+	sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+	sc->len = htonl(0);
+}
+
+static int configure_filter_smac(struct adapter *adap, struct filter_entry *f)
+{
+	int err;
+
+	/* do a set-tcb for smac-sel and CWR bit.. */
+	err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
+	if (err)
+		goto smac_err;
+
+	err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W,
+			    TCB_SMAC_SEL_V(TCB_SMAC_SEL_M),
+			    TCB_SMAC_SEL_V(f->smt->idx), 1);
+	if (!err)
+		return 0;
+
+smac_err:
+	dev_err(adap->pdev_dev, "filter %u smac config failed with error %u\n",
+		f->tid, err);
+	return err;
+}
+
+static void set_nat_params(struct adapter *adap, struct filter_entry *f,
+			   unsigned int tid, bool dip, bool sip, bool dp,
+			   bool sp)
+{
+	if (dip) {
+		if (f->fs.type) {
+			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W,
+				      WORD_MASK, f->fs.nat_lip[15] |
+				      f->fs.nat_lip[14] << 8 |
+				      f->fs.nat_lip[13] << 16 |
+				      f->fs.nat_lip[12] << 24, 1);
+
+			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 1,
+				      WORD_MASK, f->fs.nat_lip[11] |
+				      f->fs.nat_lip[10] << 8 |
+				      f->fs.nat_lip[9] << 16 |
+				      f->fs.nat_lip[8] << 24, 1);
+
+			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 2,
+				      WORD_MASK, f->fs.nat_lip[7] |
+				      f->fs.nat_lip[6] << 8 |
+				      f->fs.nat_lip[5] << 16 |
+				      f->fs.nat_lip[4] << 24, 1);
+
+			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 3,
+				      WORD_MASK, f->fs.nat_lip[3] |
+				      f->fs.nat_lip[2] << 8 |
+				      f->fs.nat_lip[1] << 16 |
+				      f->fs.nat_lip[0] << 24, 1);
+		} else {
+			set_tcb_field(adap, f, tid, TCB_RX_FRAG3_LEN_RAW_W,
+				      WORD_MASK, f->fs.nat_lip[3] |
+				      f->fs.nat_lip[2] << 8 |
+				      f->fs.nat_lip[1] << 16 |
+				      f->fs.nat_lip[0] << 24, 1);
+		}
+	}
+
+	if (sip) {
+		if (f->fs.type) {
+			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W,
+				      WORD_MASK, f->fs.nat_fip[15] |
+				      f->fs.nat_fip[14] << 8 |
+				      f->fs.nat_fip[13] << 16 |
+				      f->fs.nat_fip[12] << 24, 1);
+
+			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 1,
+				      WORD_MASK, f->fs.nat_fip[11] |
+				      f->fs.nat_fip[10] << 8 |
+				      f->fs.nat_fip[9] << 16 |
+				      f->fs.nat_fip[8] << 24, 1);
+
+			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 2,
+				      WORD_MASK, f->fs.nat_fip[7] |
+				      f->fs.nat_fip[6] << 8 |
+				      f->fs.nat_fip[5] << 16 |
+				      f->fs.nat_fip[4] << 24, 1);
+
+			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 3,
+				      WORD_MASK, f->fs.nat_fip[3] |
+				      f->fs.nat_fip[2] << 8 |
+				      f->fs.nat_fip[1] << 16 |
+				      f->fs.nat_fip[0] << 24, 1);
+
+		} else {
+			set_tcb_field(adap, f, tid,
+				      TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W,
+				      WORD_MASK, f->fs.nat_fip[3] |
+				      f->fs.nat_fip[2] << 8 |
+				      f->fs.nat_fip[1] << 16 |
+				      f->fs.nat_fip[0] << 24, 1);
+		}
+	}
+
+	set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK,
+		      (dp ? f->fs.nat_lport : 0) |
+		      (sp ? f->fs.nat_fport << 16 : 0), 1);
+}
+
 /* Validate filter spec against configuration done on the card. */
 static int validate_filter(struct net_device *dev,
 			   struct ch_filter_specification *fs)
@@ -148,6 +341,116 @@ static int get_filter_steerq(struct net_device *dev,
 	return iq;
 }
 
+static int get_filter_count(struct adapter *adapter, unsigned int fidx,
+			    u64 *pkts, u64 *bytes, bool hash)
+{
+	unsigned int tcb_base, tcbaddr;
+	unsigned int word_offset;
+	struct filter_entry *f;
+	__be64 be64_byte_count;
+	int ret;
+
+	tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A);
+	if (is_hashfilter(adapter) && hash) {
+		if (fidx < adapter->tids.ntids) {
+			f = adapter->tids.tid_tab[fidx];
+			if (!f)
+				return -EINVAL;
+		} else {
+			return -E2BIG;
+		}
+	} else {
+		if ((fidx != (adapter->tids.nftids +
+			      adapter->tids.nsftids - 1)) &&
+		    fidx >= adapter->tids.nftids)
+			return -E2BIG;
+
+		f = &adapter->tids.ftid_tab[fidx];
+		if (!f->valid)
+			return -EINVAL;
+	}
+	tcbaddr = tcb_base + f->tid * TCB_SIZE;
+
+	spin_lock(&adapter->win0_lock);
+	if (is_t4(adapter->params.chip)) {
+		__be64 be64_count;
+
+		/* T4 doesn't maintain byte counts in hw */
+		*bytes = 0;
+
+		/* Get pkts */
+		word_offset = 4;
+		ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+				   tcbaddr + (word_offset * sizeof(__be32)),
+				   sizeof(be64_count),
+				   (__be32 *)&be64_count,
+				   T4_MEMORY_READ);
+		if (ret < 0)
+			goto out;
+		*pkts = be64_to_cpu(be64_count);
+	} else {
+		__be32 be32_count;
+
+		/* Get bytes */
+		word_offset = 4;
+		ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+				   tcbaddr + (word_offset * sizeof(__be32)),
+				   sizeof(be64_byte_count),
+				   &be64_byte_count,
+				   T4_MEMORY_READ);
+		if (ret < 0)
+			goto out;
+		*bytes = be64_to_cpu(be64_byte_count);
+
+		/* Get pkts */
+		word_offset = 6;
+		ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+				   tcbaddr + (word_offset * sizeof(__be32)),
+				   sizeof(be32_count),
+				   &be32_count,
+				   T4_MEMORY_READ);
+		if (ret < 0)
+			goto out;
+		*pkts = (u64)be32_to_cpu(be32_count);
+	}
+
+out:
+	spin_unlock(&adapter->win0_lock);
+	return ret;
+}
+
+int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx,
+			      u64 *hitcnt, u64 *bytecnt, bool hash)
+{
+	struct adapter *adapter = netdev2adap(dev);
+
+	return get_filter_count(adapter, fidx, hitcnt, bytecnt, hash);
+}
+
+int cxgb4_get_free_ftid(struct net_device *dev, int family)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct tid_info *t = &adap->tids;
+	int ftid;
+
+	spin_lock_bh(&t->ftid_lock);
+	if (family == PF_INET) {
+		ftid = find_first_zero_bit(t->ftid_bmap, t->nftids);
+		if (ftid >= t->nftids)
+			ftid = -1;
+	} else {
+		ftid = bitmap_find_free_region(t->ftid_bmap, t->nftids, 2);
+		if (ftid < 0)
+			goto out_unlock;
+
+		/* this is only a lookup, keep the found region unallocated */
+		bitmap_release_region(t->ftid_bmap, ftid, 2);
+	}
+out_unlock:
+	spin_unlock_bh(&t->ftid_lock);
+	return ftid;
+}
+
 static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family)
 {
 	spin_lock_bh(&t->ftid_lock);
@@ -191,7 +494,8 @@ static int del_filter_wr(struct adapter *adapter, int fidx)
 		return -ENOMEM;
 
 	fwr = __skb_put(skb, len);
-	t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
+	t4_mk_filtdelwr(f->tid, fwr, (adapter->flags & SHUTTING_DOWN) ? -1
+			: adapter->sge.fw_evtq.abs_id);
 
 	/* Mark the filter as "pending" and ship off the Filter Work Request.
 	 * When we get the Work Request Reply we'll clear the pending status.
@@ -210,7 +514,7 @@ static int del_filter_wr(struct adapter *adapter, int fidx)
 int set_filter_wr(struct adapter *adapter, int fidx)
 {
 	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
-	struct fw_filter_wr *fwr;
+	struct fw_filter2_wr *fwr;
 	struct sk_buff *skb;
 
 	skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
@@ -231,6 +535,21 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 		}
 	}
 
+	/* If the new filter requires loopback Source MAC rewriting then
+	 * we need to allocate a SMT entry for the filter.
+	 */
+	if (f->fs.newsmac) {
+		f->smt = cxgb4_smt_alloc_switching(f->dev, f->fs.smac);
+		if (!f->smt) {
+			if (f->l2t) {
+				cxgb4_l2t_release(f->l2t);
+				f->l2t = NULL;
+			}
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+	}
+
 	fwr = __skb_put_zero(skb, sizeof(*fwr));
 
 	/* It would be nice to put most of the following in t4_hw.c but most
@@ -241,7 +560,10 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 	 * filter specification structure but for now it's easiest to simply
 	 * put this fairly direct code in line ...
 	 */
-	fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
+	if (adapter->params.filter2_wr_support)
+		fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER2_WR));
+	else
+		fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
 	fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16));
 	fwr->tid_to_iq =
 		htonl(FW_FILTER_WR_TID_V(f->tid) |
@@ -256,7 +578,6 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 		      FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
 		      FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
 		      FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
-		      FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
 		      FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
 					     f->fs.newvlan == VLAN_REWRITE) |
 		      FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
@@ -303,8 +624,18 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 	fwr->lpm = htons(f->fs.mask.lport);
 	fwr->fp = htons(f->fs.val.fport);
 	fwr->fpm = htons(f->fs.mask.fport);
-	if (f->fs.newsmac)
-		memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
+
+	if (adapter->params.filter2_wr_support) {
+		fwr->natmode_to_ulp_type =
+			FW_FILTER2_WR_ULP_TYPE_V(f->fs.nat_mode ?
+						 ULP_MODE_TCPDDP :
+						 ULP_MODE_NONE) |
+			FW_FILTER2_WR_NATMODE_V(f->fs.nat_mode);
+		memcpy(fwr->newlip, f->fs.nat_lip, sizeof(fwr->newlip));
+		memcpy(fwr->newfip, f->fs.nat_fip, sizeof(fwr->newfip));
+		fwr->newlport = htons(f->fs.nat_lport);
+		fwr->newfport = htons(f->fs.nat_fport);
+	}
 
 	/* Mark the filter as "pending" and ship off the Filter Work Request.
 	 * When we get the Work Request Reply we'll clear the pending status.
@@ -354,14 +685,18 @@ int delete_filter(struct adapter *adapter, unsigned int fidx)
 void clear_filter(struct adapter *adap, struct filter_entry *f)
 {
 	/* If the new or old filter have loopback rewriteing rules then we'll
-	 * need to free any existing Layer Two Table (L2T) entries of the old
-	 * filter rule.  The firmware will handle freeing up any Source MAC
-	 * Table (SMT) entries used for rewriting Source MAC Addresses in
-	 * loopback rules.
+	 * need to free any existing L2T, SMT, CLIP entries of filter
+	 * rule.
 	 */
 	if (f->l2t)
 		cxgb4_l2t_release(f->l2t);
 
+	if (f->smt)
+		cxgb4_smt_release(f->smt);
+
+	if (f->fs.hash && f->fs.type)
+		cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
+
 	/* The zeroing of the filter rule below clears the filter valid,
 	 * pending, locked flags, l2t pointer, etc. so it's all we need for
 	 * this operation.
@@ -431,6 +766,418 @@ static void fill_default_mask(struct ch_filter_specification *fs)
 		fs->mask.fport = ~0;
 }
 
+static bool is_addr_all_mask(u8 *ipmask, int family)
+{
+	if (family == AF_INET) {
+		struct in_addr *addr;
+
+		addr = (struct in_addr *)ipmask;
+		if (addr->s_addr == 0xffffffff)
+			return true;
+	} else if (family == AF_INET6) {
+		struct in6_addr *addr6;
+
+		addr6 = (struct in6_addr *)ipmask;
+		if (addr6->s6_addr32[0] == 0xffffffff &&
+		    addr6->s6_addr32[1] == 0xffffffff &&
+		    addr6->s6_addr32[2] == 0xffffffff &&
+		    addr6->s6_addr32[3] == 0xffffffff)
+			return true;
+	}
+	return false;
+}
+
+static bool is_inaddr_any(u8 *ip, int family)
+{
+	int addr_type;
+
+	if (family == AF_INET) {
+		struct in_addr *addr;
+
+		addr = (struct in_addr *)ip;
+		if (addr->s_addr == htonl(INADDR_ANY))
+			return true;
+	} else if (family == AF_INET6) {
+		struct in6_addr *addr6;
+
+		addr6 = (struct in6_addr *)ip;
+		addr_type = ipv6_addr_type((const struct in6_addr *)
+					   &addr6);
+		if (addr_type == IPV6_ADDR_ANY)
+			return true;
+	}
+	return false;
+}
+
+bool is_filter_exact_match(struct adapter *adap,
+			   struct ch_filter_specification *fs)
+{
+	struct tp_params *tp = &adap->params.tp;
+	u64 hash_filter_mask = tp->hash_filter_mask;
+	u32 mask;
+
+	if (!is_hashfilter(adap))
+		return false;
+
+	if (fs->type) {
+		if (is_inaddr_any(fs->val.fip, AF_INET6) ||
+		    !is_addr_all_mask(fs->mask.fip, AF_INET6))
+			return false;
+
+		if (is_inaddr_any(fs->val.lip, AF_INET6) ||
+		    !is_addr_all_mask(fs->mask.lip, AF_INET6))
+			return false;
+	} else {
+		if (is_inaddr_any(fs->val.fip, AF_INET) ||
+		    !is_addr_all_mask(fs->mask.fip, AF_INET))
+			return false;
+
+		if (is_inaddr_any(fs->val.lip, AF_INET) ||
+		    !is_addr_all_mask(fs->mask.lip, AF_INET))
+			return false;
+	}
+
+	if (!fs->val.lport || fs->mask.lport != 0xffff)
+		return false;
+
+	if (!fs->val.fport || fs->mask.fport != 0xffff)
+		return false;
+
+	if (tp->fcoe_shift >= 0) {
+		mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W;
+		if (mask && !fs->mask.fcoe)
+			return false;
+	}
+
+	if (tp->port_shift >= 0) {
+		mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W;
+		if (mask && !fs->mask.iport)
+			return false;
+	}
+
+	if (tp->vnic_shift >= 0) {
+		mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W;
+
+		if ((adap->params.tp.ingress_config & VNIC_F)) {
+			if (mask && !fs->mask.pfvf_vld)
+				return false;
+		} else {
+			if (mask && !fs->mask.ovlan_vld)
+				return false;
+		}
+	}
+
+	if (tp->vlan_shift >= 0) {
+		mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W;
+		if (mask && !fs->mask.ivlan)
+			return false;
+	}
+
+	if (tp->tos_shift >= 0) {
+		mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W;
+		if (mask && !fs->mask.tos)
+			return false;
+	}
+
+	if (tp->protocol_shift >= 0) {
+		mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W;
+		if (mask && !fs->mask.proto)
+			return false;
+	}
+
+	if (tp->ethertype_shift >= 0) {
+		mask = (hash_filter_mask >> tp->ethertype_shift) &
+			FT_ETHERTYPE_W;
+		if (mask && !fs->mask.ethtype)
+			return false;
+	}
+
+	if (tp->macmatch_shift >= 0) {
+		mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W;
+		if (mask && !fs->mask.macidx)
+			return false;
+	}
+
+	if (tp->matchtype_shift >= 0) {
+		mask = (hash_filter_mask >> tp->matchtype_shift) &
+			FT_MPSHITTYPE_W;
+		if (mask && !fs->mask.matchtype)
+			return false;
+	}
+	if (tp->frag_shift >= 0) {
+		mask = (hash_filter_mask >> tp->frag_shift) &
+			FT_FRAGMENTATION_W;
+		if (mask && !fs->mask.frag)
+			return false;
+	}
+	return true;
+}
+
+static u64 hash_filter_ntuple(struct ch_filter_specification *fs,
+			      struct net_device *dev)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct tp_params *tp = &adap->params.tp;
+	u64 ntuple = 0;
+
+	/* Initialize each of the fields which we care about which are present
+	 * in the Compressed Filter Tuple.
+	 */
+	if (tp->vlan_shift >= 0 && fs->mask.ivlan)
+		ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift;
+
+	if (tp->port_shift >= 0 && fs->mask.iport)
+		ntuple |= (u64)fs->val.iport << tp->port_shift;
+
+	if (tp->protocol_shift >= 0) {
+		if (!fs->val.proto)
+			ntuple |= (u64)IPPROTO_TCP << tp->protocol_shift;
+		else
+			ntuple |= (u64)fs->val.proto << tp->protocol_shift;
+	}
+
+	if (tp->tos_shift >= 0 && fs->mask.tos)
+		ntuple |= (u64)(fs->val.tos) << tp->tos_shift;
+
+	if (tp->vnic_shift >= 0) {
+		if ((adap->params.tp.ingress_config & VNIC_F) &&
+		    fs->mask.pfvf_vld)
+			ntuple |= (u64)((fs->val.pfvf_vld << 16) |
+					(fs->val.pf << 13) |
+					(fs->val.vf)) << tp->vnic_shift;
+		else
+			ntuple |= (u64)((fs->val.ovlan_vld << 16) |
+					(fs->val.ovlan)) << tp->vnic_shift;
+	}
+
+	if (tp->macmatch_shift >= 0 && fs->mask.macidx)
+		ntuple |= (u64)(fs->val.macidx) << tp->macmatch_shift;
+
+	if (tp->ethertype_shift >= 0 && fs->mask.ethtype)
+		ntuple |= (u64)(fs->val.ethtype) << tp->ethertype_shift;
+
+	if (tp->matchtype_shift >= 0 && fs->mask.matchtype)
+		ntuple |= (u64)(fs->val.matchtype) << tp->matchtype_shift;
+
+	if (tp->frag_shift >= 0 && fs->mask.frag)
+		ntuple |= (u64)(fs->val.frag) << tp->frag_shift;
+
+	if (tp->fcoe_shift >= 0 && fs->mask.fcoe)
+		ntuple |= (u64)(fs->val.fcoe) << tp->fcoe_shift;
+	return ntuple;
+}
+
+static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
+			     unsigned int qid_filterid, struct adapter *adap)
+{
+	struct cpl_t6_act_open_req6 *t6req = NULL;
+	struct cpl_act_open_req6 *req = NULL;
+
+	t6req = (struct cpl_t6_act_open_req6 *)__skb_put(skb, sizeof(*t6req));
+	INIT_TP_WR(t6req, 0);
+	req = (struct cpl_act_open_req6 *)t6req;
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_filterid));
+	req->local_port = cpu_to_be16(f->fs.val.lport);
+	req->peer_port = cpu_to_be16(f->fs.val.fport);
+	req->local_ip_hi = *(__be64 *)(&f->fs.val.lip);
+	req->local_ip_lo = *(((__be64 *)&f->fs.val.lip) + 1);
+	req->peer_ip_hi = *(__be64 *)(&f->fs.val.fip);
+	req->peer_ip_lo = *(((__be64 *)&f->fs.val.fip) + 1);
+	req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE ||
+					f->fs.newvlan == VLAN_REWRITE) |
+				DELACK_V(f->fs.hitcnts) |
+				L2T_IDX_V(f->l2t ? f->l2t->idx : 0) |
+				SMAC_SEL_V((cxgb4_port_viid(f->dev) &
+					    0x7F) << 1) |
+				TX_CHAN_V(f->fs.eport) |
+				NO_CONG_V(f->fs.rpttid) |
+				ULP_MODE_V(f->fs.nat_mode ?
+					   ULP_MODE_TCPDDP : ULP_MODE_NONE) |
+				TCAM_BYPASS_F | NON_OFFLOAD_F);
+	t6req->params = cpu_to_be64(FILTER_TUPLE_V(hash_filter_ntuple(&f->fs,
+								      f->dev)));
+	t6req->opt2 = htonl(RSS_QUEUE_VALID_F |
+			    RSS_QUEUE_V(f->fs.iq) |
+			    TX_QUEUE_V(f->fs.nat_mode) |
+			    T5_OPT_2_VALID_F |
+			    RX_CHANNEL_F |
+			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
+					 (f->fs.dirsteer << 1)) |
+			    PACE_V((f->fs.maskhash) |
+				   ((f->fs.dirsteerhash) << 1)) |
+			    CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
+}
+
+static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
+			    unsigned int qid_filterid, struct adapter *adap)
+{
+	struct cpl_t6_act_open_req *t6req = NULL;
+	struct cpl_act_open_req *req = NULL;
+
+	t6req = (struct cpl_t6_act_open_req *)__skb_put(skb, sizeof(*t6req));
+	INIT_TP_WR(t6req, 0);
+	req = (struct cpl_act_open_req *)t6req;
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_filterid));
+	req->local_port = cpu_to_be16(f->fs.val.lport);
+	req->peer_port = cpu_to_be16(f->fs.val.fport);
+	req->local_ip = f->fs.val.lip[0] | f->fs.val.lip[1] << 8 |
+		f->fs.val.lip[2] << 16 | f->fs.val.lip[3] << 24;
+	req->peer_ip = f->fs.val.fip[0] | f->fs.val.fip[1] << 8 |
+		f->fs.val.fip[2] << 16 | f->fs.val.fip[3] << 24;
+	req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE ||
+					f->fs.newvlan == VLAN_REWRITE) |
+				DELACK_V(f->fs.hitcnts) |
+				L2T_IDX_V(f->l2t ? f->l2t->idx : 0) |
+				SMAC_SEL_V((cxgb4_port_viid(f->dev) &
+					    0x7F) << 1) |
+				TX_CHAN_V(f->fs.eport) |
+				NO_CONG_V(f->fs.rpttid) |
+				ULP_MODE_V(f->fs.nat_mode ?
+					   ULP_MODE_TCPDDP : ULP_MODE_NONE) |
+				TCAM_BYPASS_F | NON_OFFLOAD_F);
+
+	t6req->params = cpu_to_be64(FILTER_TUPLE_V(hash_filter_ntuple(&f->fs,
+								      f->dev)));
+	t6req->opt2 = htonl(RSS_QUEUE_VALID_F |
+			    RSS_QUEUE_V(f->fs.iq) |
+			    TX_QUEUE_V(f->fs.nat_mode) |
+			    T5_OPT_2_VALID_F |
+			    RX_CHANNEL_F |
+			    CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
+					 (f->fs.dirsteer << 1)) |
+			    PACE_V((f->fs.maskhash) |
+				   ((f->fs.dirsteerhash) << 1)) |
+			    CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
+}
+
+static int cxgb4_set_hash_filter(struct net_device *dev,
+				 struct ch_filter_specification *fs,
+				 struct filter_ctx *ctx)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	struct tid_info *t = &adapter->tids;
+	struct filter_entry *f;
+	struct sk_buff *skb;
+	int iq, atid, size;
+	int ret = 0;
+	u32 iconf;
+
+	fill_default_mask(fs);
+	ret = validate_filter(dev, fs);
+	if (ret)
+		return ret;
+
+	iq = get_filter_steerq(dev, fs);
+	if (iq < 0)
+		return iq;
+
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return -ENOMEM;
+
+	f->fs = *fs;
+	f->ctx = ctx;
+	f->dev = dev;
+	f->fs.iq = iq;
+
+	/* If the new filter requires loopback Destination MAC and/or VLAN
+	 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
+	 * the filter.
+	 */
+	if (f->fs.newdmac || f->fs.newvlan) {
+		/* allocate L2T entry for new filter */
+		f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
+						f->fs.eport, f->fs.dmac);
+		if (!f->l2t) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
+	}
+
+	/* If the new filter requires loopback Source MAC rewriting then
+	 * we need to allocate a SMT entry for the filter.
+	 */
+	if (f->fs.newsmac) {
+		f->smt = cxgb4_smt_alloc_switching(f->dev, f->fs.smac);
+		if (!f->smt) {
+			if (f->l2t) {
+				cxgb4_l2t_release(f->l2t);
+				f->l2t = NULL;
+			}
+			ret = -ENOMEM;
+			goto free_l2t;
+		}
+	}
+
+	atid = cxgb4_alloc_atid(t, f);
+	if (atid < 0) {
+		ret = atid;
+		goto free_smt;
+	}
+
+	iconf = adapter->params.tp.ingress_config;
+	if (iconf & VNIC_F) {
+		f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf;
+		f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf;
+		f->fs.val.ovlan_vld = fs->val.pfvf_vld;
+		f->fs.mask.ovlan_vld = fs->mask.pfvf_vld;
+	}
+
+	size = sizeof(struct cpl_t6_act_open_req);
+	if (f->fs.type) {
+		ret = cxgb4_clip_get(f->dev, (const u32 *)&f->fs.val.lip, 1);
+		if (ret)
+			goto free_atid;
+
+		skb = alloc_skb(size, GFP_KERNEL);
+		if (!skb) {
+			ret = -ENOMEM;
+			goto free_clip;
+		}
+
+		mk_act_open_req6(f, skb,
+				 ((adapter->sge.fw_evtq.abs_id << 14) | atid),
+				 adapter);
+	} else {
+		skb = alloc_skb(size, GFP_KERNEL);
+		if (!skb) {
+			ret = -ENOMEM;
+			goto free_atid;
+		}
+
+		mk_act_open_req(f, skb,
+				((adapter->sge.fw_evtq.abs_id << 14) | atid),
+				adapter);
+	}
+
+	f->pending = 1;
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, f->fs.val.iport & 0x3);
+	t4_ofld_send(adapter, skb);
+	return 0;
+
+free_clip:
+	cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
+
+free_atid:
+	cxgb4_free_atid(t, atid);
+
+free_smt:
+	if (f->smt) {
+		cxgb4_smt_release(f->smt);
+		f->smt = NULL;
+	}
+
+free_l2t:
+	if (f->l2t) {
+		cxgb4_l2t_release(f->l2t);
+		f->l2t = NULL;
+	}
+
+out_err:
+	kfree(f);
+	return ret;
+}
+
 /* Check a Chelsio Filter Request for validity, convert it into our internal
  * format and send it to the hardware.  Return 0 on success, an error number
  * otherwise.  We attach any provided filter operation context to the internal
@@ -447,6 +1194,14 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 	u32 iconf;
 	int iq, ret;
 
+	if (fs->hash) {
+		if (is_hashfilter(adapter))
+			return cxgb4_set_hash_filter(dev, fs, ctx);
+		netdev_err(dev, "%s: Exact-match filters only supported with Hash Filter configuration\n",
+			   __func__);
+		return -EINVAL;
+	}
+
 	max_fidx = adapter->tids.nftids;
 	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
 	    filter_id >= max_fidx)
@@ -568,12 +1323,74 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 	return ret;
 }
 
+static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id,
+				 struct filter_ctx *ctx)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	struct tid_info *t = &adapter->tids;
+	struct cpl_abort_req *abort_req;
+	struct cpl_abort_rpl *abort_rpl;
+	struct cpl_set_tcb_field *req;
+	struct ulptx_idata *aligner;
+	struct work_request_hdr *wr;
+	struct filter_entry *f;
+	struct sk_buff *skb;
+	unsigned int wrlen;
+	int ret;
+
+	netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n",
+		   __func__, filter_id, adapter->tids.nftids);
+
+	if (filter_id > adapter->tids.ntids)
+		return -E2BIG;
+
+	f = lookup_tid(t, filter_id);
+	if (!f) {
+		netdev_err(dev, "%s: no filter entry for filter_id = %d",
+			   __func__, filter_id);
+		return -EINVAL;
+	}
+
+	ret = writable_filter(f);
+	if (ret)
+		return ret;
+
+	if (!f->valid)
+		return -EINVAL;
+
+	f->ctx = ctx;
+	f->pending = 1;
+	wrlen = roundup(sizeof(*wr) + (sizeof(*req) + sizeof(*aligner))
+			+ sizeof(*abort_req) + sizeof(*abort_rpl), 16);
+	skb = alloc_skb(wrlen, GFP_KERNEL);
+	if (!skb) {
+		netdev_err(dev, "%s: could not allocate skb ..\n", __func__);
+		return -ENOMEM;
+	}
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
+	req = (struct cpl_set_tcb_field *)__skb_put(skb, wrlen);
+	INIT_ULPTX_WR(req, wrlen, 0, 0);
+	wr = (struct work_request_hdr *)req;
+	wr++;
+	req = (struct cpl_set_tcb_field *)wr;
+	mk_set_tcb_ulp(f, req, TCB_RSS_INFO_W, TCB_RSS_INFO_V(TCB_RSS_INFO_M),
+		       TCB_RSS_INFO_V(adapter->sge.fw_evtq.abs_id), 0, 1);
+	aligner = (struct ulptx_idata *)(req + 1);
+	abort_req = (struct cpl_abort_req *)(aligner + 1);
+	mk_abort_req_ulp(abort_req, f->tid);
+	abort_rpl = (struct cpl_abort_rpl *)(abort_req + 1);
+	mk_abort_rpl_ulp(abort_rpl, f->tid);
+	t4_ofld_send(adapter, skb);
+	return 0;
+}
+
 /* Check a delete filter request for validity and send it to the hardware.
  * Return 0 on success, an error number otherwise.  We attach any provided
  * filter operation context to the internal filter specification in order to
  * facilitate signaling completion of the operation.
  */
 int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+		       struct ch_filter_specification *fs,
 		       struct filter_ctx *ctx)
 {
 	struct adapter *adapter = netdev2adap(dev);
@@ -581,6 +1398,14 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id,
 	unsigned int max_fidx;
 	int ret;
 
+	if (fs && fs->hash) {
+		if (is_hashfilter(adapter))
+			return cxgb4_del_hash_filter(dev, filter_id, ctx);
+		netdev_err(dev, "%s: Exact-match filters only supported with Hash Filter configuration\n",
+			   __func__);
+		return -EINVAL;
+	}
+
 	max_fidx = adapter->tids.nftids;
 	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
 	    filter_id >= max_fidx)
@@ -631,14 +1456,19 @@ out:
 	return ret;
 }
 
-int cxgb4_del_filter(struct net_device *dev, int filter_id)
+int cxgb4_del_filter(struct net_device *dev, int filter_id,
+		     struct ch_filter_specification *fs)
 {
 	struct filter_ctx ctx;
 	int ret;
 
+	/* If we are shutting down the adapter do not wait for completion */
+	if (netdev2adap(dev)->flags & SHUTTING_DOWN)
+		return __cxgb4_del_filter(dev, filter_id, fs, NULL);
+
 	init_completion(&ctx.completion);
 
-	ret = __cxgb4_del_filter(dev, filter_id, &ctx);
+	ret = __cxgb4_del_filter(dev, filter_id, fs, &ctx);
 	if (ret)
 		goto out;
 
@@ -652,6 +1482,157 @@ out:
 	return ret;
 }
 
+static int configure_filter_tcb(struct adapter *adap, unsigned int tid,
+				struct filter_entry *f)
+{
+	if (f->fs.hitcnts)
+		set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W,
+			      TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) |
+			      TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M),
+			      TCB_TIMESTAMP_V(0ULL) |
+			      TCB_RTT_TS_RECENT_AGE_V(0ULL),
+			      1);
+
+	if (f->fs.newdmac)
+		set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1,
+			      1);
+
+	if (f->fs.newvlan == VLAN_INSERT ||
+	    f->fs.newvlan == VLAN_REWRITE)
+		set_tcb_tflag(adap, f, tid, TF_CCTRL_RFR_S, 1,
+			      1);
+	if (f->fs.newsmac)
+		configure_filter_smac(adap, f);
+
+	if (f->fs.nat_mode) {
+		switch (f->fs.nat_mode) {
+		case NAT_MODE_DIP:
+			set_nat_params(adap, f, tid, true, false, false, false);
+			break;
+
+		case NAT_MODE_DIP_DP:
+			set_nat_params(adap, f, tid, true, false, true, false);
+			break;
+
+		case NAT_MODE_DIP_DP_SIP:
+			set_nat_params(adap, f, tid, true, true, true, false);
+			break;
+		case NAT_MODE_DIP_DP_SP:
+			set_nat_params(adap, f, tid, true, false, true, true);
+			break;
+
+		case NAT_MODE_SIP_SP:
+			set_nat_params(adap, f, tid, false, true, false, true);
+			break;
+
+		case NAT_MODE_DIP_SIP_SP:
+			set_nat_params(adap, f, tid, true, true, false, true);
+			break;
+
+		case NAT_MODE_ALL:
+			set_nat_params(adap, f, tid, true, true, true, true);
+			break;
+
+		default:
+			pr_err("%s: Invalid NAT mode: %d\n",
+			       __func__, f->fs.nat_mode);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+void hash_del_filter_rpl(struct adapter *adap,
+			 const struct cpl_abort_rpl_rss *rpl)
+{
+	unsigned int status = rpl->status;
+	struct tid_info *t = &adap->tids;
+	unsigned int tid = GET_TID(rpl);
+	struct filter_ctx *ctx = NULL;
+	struct filter_entry *f;
+
+	dev_dbg(adap->pdev_dev, "%s: status = %u; tid = %u\n",
+		__func__, status, tid);
+
+	f = lookup_tid(t, tid);
+	if (!f) {
+		dev_err(adap->pdev_dev, "%s:could not find filter entry",
+			__func__);
+		return;
+	}
+	ctx = f->ctx;
+	f->ctx = NULL;
+	clear_filter(adap, f);
+	cxgb4_remove_tid(t, 0, tid, 0);
+	kfree(f);
+	if (ctx) {
+		ctx->result = 0;
+		complete(&ctx->completion);
+	}
+}
+
+void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl)
+{
+	unsigned int ftid = TID_TID_G(AOPEN_ATID_G(ntohl(rpl->atid_status)));
+	unsigned int status  = AOPEN_STATUS_G(ntohl(rpl->atid_status));
+	struct tid_info *t = &adap->tids;
+	unsigned int tid = GET_TID(rpl);
+	struct filter_ctx *ctx = NULL;
+	struct filter_entry *f;
+
+	dev_dbg(adap->pdev_dev, "%s: tid = %u; atid = %u; status = %u\n",
+		__func__, tid, ftid, status);
+
+	f = lookup_atid(t, ftid);
+	if (!f) {
+		dev_err(adap->pdev_dev, "%s:could not find filter entry",
+			__func__);
+		return;
+	}
+	ctx = f->ctx;
+	f->ctx = NULL;
+
+	switch (status) {
+	case CPL_ERR_NONE:
+		f->tid = tid;
+		f->pending = 0;
+		f->valid = 1;
+		cxgb4_insert_tid(t, f, f->tid, 0);
+		cxgb4_free_atid(t, ftid);
+		if (ctx) {
+			ctx->tid = f->tid;
+			ctx->result = 0;
+		}
+		if (configure_filter_tcb(adap, tid, f)) {
+			clear_filter(adap, f);
+			cxgb4_remove_tid(t, 0, tid, 0);
+			kfree(f);
+			if (ctx) {
+				ctx->result = -EINVAL;
+				complete(&ctx->completion);
+			}
+			return;
+		}
+		break;
+
+	default:
+		dev_err(adap->pdev_dev, "%s: filter creation PROBLEM; status = %u\n",
+			__func__, status);
+
+		if (ctx) {
+			if (status == CPL_ERR_TCAM_FULL)
+				ctx->result = -EAGAIN;
+			else
+				ctx->result = -EINVAL;
+		}
+		clear_filter(adap, f);
+		cxgb4_free_atid(t, ftid);
+		kfree(f);
+	}
+	if (ctx)
+		complete(&ctx->completion);
+}
+
 /* Handle a filter write/deletion reply. */
 void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 {
@@ -690,19 +1671,23 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 			clear_filter(adap, f);
 			if (ctx)
 				ctx->result = 0;
-		} else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
-			dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
-				idx);
-			clear_filter(adap, f);
-			if (ctx)
-				ctx->result = -ENOMEM;
 		} else if (ret == FW_FILTER_WR_FLT_ADDED) {
-			f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
-			f->pending = 0;  /* asynchronous setup completed */
-			f->valid = 1;
-			if (ctx) {
-				ctx->result = 0;
-				ctx->tid = idx;
+			int err = 0;
+
+			if (f->fs.newsmac)
+				err = configure_filter_smac(adap, f);
+
+			if (!err) {
+				f->pending = 0;  /* async setup completed */
+				f->valid = 1;
+				if (ctx) {
+					ctx->result = 0;
+					ctx->tid = idx;
+				}
+			} else {
+				clear_filter(adap, f);
+				if (ctx)
+					ctx->result = err;
 			}
 		} else {
 			/* Something went wrong.  Issue a warning about the
@@ -718,3 +1703,25 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 			complete(&ctx->completion);
 	}
 }
+
+int init_hash_filter(struct adapter *adap)
+{
+	/* On T6, verify the necessary register configs and warn the user in
+	 * case of improper config
+	 */
+	if (is_t6(adap->params.chip)) {
+		if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4)
+			goto err;
+
+		if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4)
+			goto err;
+	} else {
+		dev_err(adap->pdev_dev, "Hash filter supported only on T6\n");
+		return -EINVAL;
+	}
+	adap->params.hash_filter = 1;
+	return 0;
+err:
+	dev_warn(adap->pdev_dev, "Invalid hash filter config!\n");
+	return -EINVAL;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
index 23742cb1c69f..8db5fca6dcc9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -37,7 +37,12 @@
 
 #include "t4_msg.h"
 
+#define WORD_MASK	0xffffffff
+
 void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl);
+void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl);
+void hash_del_filter_rpl(struct adapter *adap,
+			 const struct cpl_abort_rpl_rss *rpl);
 void clear_filter(struct adapter *adap, struct filter_entry *f);
 
 int set_filter_wr(struct adapter *adapter, int fidx);
@@ -45,4 +50,7 @@ int delete_filter(struct adapter *adapter, unsigned int fidx);
 
 int writable_filter(struct filter_entry *f);
 void clear_all_filters(struct adapter *adapter);
+int init_hash_filter(struct adapter *adap);
+bool is_filter_exact_match(struct adapter *adap,
+			   struct ch_filter_specification *fs);
 #endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 92d9d795d874..6f900ffe25cc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -77,9 +77,12 @@
 #include "cxgb4_debugfs.h"
 #include "clip_tbl.h"
 #include "l2t.h"
+#include "smt.h"
 #include "sched.h"
 #include "cxgb4_tc_u32.h"
+#include "cxgb4_tc_flower.h"
 #include "cxgb4_ptp.h"
+#include "cxgb4_cudbg.h"
 
 char cxgb4_driver_name[] = KBUILD_MODNAME;
 
@@ -280,7 +283,7 @@ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
 		else {
 #ifdef CONFIG_CHELSIO_T4_DCB
 			if (cxgb4_dcb_enabled(dev)) {
-				cxgb4_dcb_state_init(dev);
+				cxgb4_dcb_reset(dev);
 				dcb_tx_queue_prio_enable(dev, false);
 			}
 #endif /* CONFIG_CHELSIO_T4_DCB */
@@ -561,10 +564,22 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 		const struct cpl_l2t_write_rpl *p = (void *)rsp;
 
 		do_l2t_write_rpl(q->adap, p);
+	} else if (opcode == CPL_SMT_WRITE_RPL) {
+		const struct cpl_smt_write_rpl *p = (void *)rsp;
+
+		do_smt_write_rpl(q->adap, p);
 	} else if (opcode == CPL_SET_TCB_RPL) {
 		const struct cpl_set_tcb_rpl *p = (void *)rsp;
 
 		filter_rpl(q->adap, p);
+	} else if (opcode == CPL_ACT_OPEN_RPL) {
+		const struct cpl_act_open_rpl *p = (void *)rsp;
+
+		hash_filter_rpl(q->adap, p);
+	} else if (opcode == CPL_ABORT_RPL_RSS) {
+		const struct cpl_abort_rpl_rss *p = (void *)rsp;
+
+		hash_del_filter_rpl(q->adap, p);
 	} else
 		dev_err(q->adap->pdev_dev,
 			"unexpected CPL %#x on FW event queue\n", opcode);
@@ -1637,7 +1652,7 @@ void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
 	struct adapter *adap = pci_get_drvdata(pdev);
 
 	spin_lock(&adap->stats_lock);
-	t4_tp_get_tcp_stats(adap, v4, v6);
+	t4_tp_get_tcp_stats(adap, v4, v6, false);
 	spin_unlock(&adap->stats_lock);
 }
 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
@@ -2303,10 +2318,16 @@ static int cxgb_close(struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
+	int ret;
 
 	netif_tx_stop_all_queues(dev);
 	netif_carrier_off(dev);
-	return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
+	ret = t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
+#ifdef CONFIG_CHELSIO_T4_DCB
+	cxgb4_dcb_reset(dev);
+	dcb_tx_queue_prio_enable(dev, false);
+#endif
+	return ret;
 }
 
 int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
@@ -2873,11 +2894,28 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	return err;
 }
 
+static int cxgb_setup_tc_flower(struct net_device *dev,
+				struct tc_cls_flower_offload *cls_flower)
+{
+	if (cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return cxgb4_tc_flower_replace(dev, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return cxgb4_tc_flower_destroy(dev, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return cxgb4_tc_flower_stats(dev, cls_flower);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int cxgb_setup_tc_cls_u32(struct net_device *dev,
 				 struct tc_cls_u32_offload *cls_u32)
 {
-	if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
-	    cls_u32->common.chain_index)
+	if (cls_u32->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_u32->command) {
@@ -2891,9 +2929,10 @@ static int cxgb_setup_tc_cls_u32(struct net_device *dev,
 	}
 }
 
-static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			 void *type_data)
+static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				  void *cb_priv)
 {
+	struct net_device *dev = cb_priv;
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
 
@@ -2904,9 +2943,45 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		return -EINVAL;
 	}
 
+	if (!tc_can_offload(dev))
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case TC_SETUP_CLSU32:
 		return cxgb_setup_tc_cls_u32(dev, type_data);
+	case TC_SETUP_CLSFLOWER:
+		return cxgb_setup_tc_flower(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int cxgb_setup_tc_block(struct net_device *dev,
+			       struct tc_block_offload *f)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb,
+					     pi, dev);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			 void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return cxgb_setup_tc_block(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3876,6 +3951,16 @@ static int adap_init0(struct adapter *adap)
 			      1, params, val);
 	adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0);
 
+	/* See if FW supports FW_FILTER2 work request */
+	if (is_t4(adap->params.chip)) {
+		adap->params.filter2_wr_support = 0;
+	} else {
+		params[0] = FW_PARAM_DEV(FILTER2_WR);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+				      1, params, val);
+		adap->params.filter2_wr_support = (ret == 0 && val[0] != 0);
+	}
+
 	/*
 	 * Get device capabilities so we can determine what resources we need
 	 * to manage.
@@ -3889,7 +3974,8 @@ static int adap_init0(struct adapter *adap)
 	if (ret < 0)
 		goto bye;
 
-	if (caps_cmd.ofldcaps) {
+	if (caps_cmd.ofldcaps ||
+	    (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) {
 		/* query offload-related parameters */
 		params[0] = FW_PARAM_DEV(NTID);
 		params[1] = FW_PARAM_PFVF(SERVER_START);
@@ -3926,8 +4012,13 @@ static int adap_init0(struct adapter *adap)
 		adap->vres.ddp.size = val[4] - val[3] + 1;
 		adap->params.ofldq_wr_cred = val[5];
 
-		adap->params.offload = 1;
-		adap->num_ofld_uld += 1;
+		if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
+			if (init_hash_filter(adap) < 0)
+				goto bye;
+		} else {
+			adap->params.offload = 1;
+			adap->num_ofld_uld += 1;
+		}
 	}
 	if (caps_cmd.rdmacaps) {
 		params[0] = FW_PARAM_PFVF(STAG_START);
@@ -4048,7 +4139,7 @@ static int adap_init0(struct adapter *adap)
 	}
 	t4_init_sge_params(adap);
 	adap->flags |= FW_OK;
-	t4_init_tp_params(adap);
+	t4_init_tp_params(adap, true);
 	return 0;
 
 	/*
@@ -4612,9 +4703,11 @@ static void free_some_resources(struct adapter *adapter)
 {
 	unsigned int i;
 
+	kvfree(adapter->smt);
 	kvfree(adapter->l2t);
 	t4_cleanup_sched(adapter);
 	kvfree(adapter->tids.tid_tab);
+	cxgb4_cleanup_tc_flower(adapter);
 	cxgb4_cleanup_tc_u32(adapter);
 	kfree(adapter->sge.egr_map);
 	kfree(adapter->sge.ingr_map);
@@ -4995,7 +5088,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		/* MTU range: 81 - 9600 */
-		netdev->min_mtu = 81;
+		netdev->min_mtu = 81;              /* accommodate SACK */
 		netdev->max_mtu = MAX_MTU;
 
 		netdev->netdev_ops = &cxgb4_netdev_ops;
@@ -5006,6 +5099,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		cxgb4_set_ethtool_ops(netdev);
 	}
 
+	cxgb4_init_ethtool_dump(adapter);
+
 	pci_set_drvdata(pdev, adapter);
 
 	if (adapter->flags & FW_OK) {
@@ -5035,6 +5130,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 */
 	cfg_queues(adapter);
 
+	adapter->smt = t4_init_smt();
+	if (!adapter->smt) {
+		/* We tolerate a lack of SMT, giving up some functionality */
+		dev_warn(&pdev->dev, "could not allocate SMT, continuing\n");
+	}
+
 	adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end);
 	if (!adapter->l2t) {
 		/* We tolerate a lack of L2T, giving up some functionality */
@@ -5083,9 +5184,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		if (!adapter->tc_u32)
 			dev_warn(&pdev->dev,
 				 "could not offload tc u32, continuing\n");
+
+		if (cxgb4_init_tc_flower(adapter))
+			dev_warn(&pdev->dev,
+				 "could not offload tc flower, continuing\n");
 	}
 
-	if (is_offload(adapter)) {
+	if (is_offload(adapter) || is_hashfilter(adapter)) {
 		if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) {
 			u32 hash_base, hash_reg;
 
@@ -5254,6 +5359,8 @@ static void remove_one(struct pci_dev *pdev)
 		return;
 	}
 
+	adapter->flags |= SHUTTING_DOWN;
+
 	if (adapter->pf == 4) {
 		int i;
 
@@ -5339,6 +5446,8 @@ static void shutdown_one(struct pci_dev *pdev)
 		return;
 	}
 
+	adapter->flags |= SHUTTING_DOWN;
+
 	if (adapter->pf == 4) {
 		int i;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
new file mode 100644
index 000000000000..d4a548a6a55c
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -0,0 +1,876 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "cxgb4.h"
+#include "cxgb4_filter.h"
+#include "cxgb4_tc_flower.h"
+
+#define STATS_CHECK_PERIOD (HZ / 2)
+
+struct ch_tc_pedit_fields pedits[] = {
+	PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0),
+	PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4),
+	PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0),
+	PEDIT_FIELDS(ETH_, SMAC_47_16, 4, smac, 2),
+	PEDIT_FIELDS(IP4_, SRC, 4, nat_fip, 0),
+	PEDIT_FIELDS(IP4_, DST, 4, nat_lip, 0),
+	PEDIT_FIELDS(IP6_, SRC_31_0, 4, nat_fip, 0),
+	PEDIT_FIELDS(IP6_, SRC_63_32, 4, nat_fip, 4),
+	PEDIT_FIELDS(IP6_, SRC_95_64, 4, nat_fip, 8),
+	PEDIT_FIELDS(IP6_, SRC_127_96, 4, nat_fip, 12),
+	PEDIT_FIELDS(IP6_, DST_31_0, 4, nat_lip, 0),
+	PEDIT_FIELDS(IP6_, DST_63_32, 4, nat_lip, 4),
+	PEDIT_FIELDS(IP6_, DST_95_64, 4, nat_lip, 8),
+	PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
+	PEDIT_FIELDS(TCP_, SPORT, 2, nat_fport, 0),
+	PEDIT_FIELDS(TCP_, DPORT, 2, nat_lport, 0),
+	PEDIT_FIELDS(UDP_, SPORT, 2, nat_fport, 0),
+	PEDIT_FIELDS(UDP_, DPORT, 2, nat_lport, 0),
+};
+
+static struct ch_tc_flower_entry *allocate_flower_entry(void)
+{
+	struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
+	spin_lock_init(&new->lock);
+	return new;
+}
+
+/* Must be called with either RTNL or rcu_read_lock */
+static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap,
+						   unsigned long flower_cookie)
+{
+	return rhashtable_lookup_fast(&adap->flower_tbl, &flower_cookie,
+				      adap->flower_ht_params);
+}
+
+static void cxgb4_process_flow_match(struct net_device *dev,
+				     struct tc_cls_flower_offload *cls,
+				     struct ch_filter_specification *fs)
+{
+	u16 addr_type = 0;
+
+	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_CONTROL,
+						  cls->key);
+
+		addr_type = key->addr_type;
+	}
+
+	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  cls->key);
+		struct flow_dissector_key_basic *mask =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  cls->mask);
+		u16 ethtype_key = ntohs(key->n_proto);
+		u16 ethtype_mask = ntohs(mask->n_proto);
+
+		if (ethtype_key == ETH_P_ALL) {
+			ethtype_key = 0;
+			ethtype_mask = 0;
+		}
+
+		fs->val.ethtype = ethtype_key;
+		fs->mask.ethtype = ethtype_mask;
+		fs->val.proto = key->ip_proto;
+		fs->mask.proto = mask->ip_proto;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  cls->key);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  cls->mask);
+		fs->type = 0;
+		memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst));
+		memcpy(&fs->val.fip[0], &key->src, sizeof(key->src));
+		memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst));
+		memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src));
+
+		/* also initialize nat_lip/fip to same values */
+		memcpy(&fs->nat_lip[0], &key->dst, sizeof(key->dst));
+		memcpy(&fs->nat_fip[0], &key->src, sizeof(key->src));
+
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_dissector_key_ipv6_addrs *key =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  cls->key);
+		struct flow_dissector_key_ipv6_addrs *mask =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  cls->mask);
+
+		fs->type = 1;
+		memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst));
+		memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src));
+		memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst));
+		memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src));
+
+		/* also initialize nat_lip/fip to same values */
+		memcpy(&fs->nat_lip[0], key->dst.s6_addr, sizeof(key->dst));
+		memcpy(&fs->nat_fip[0], key->src.s6_addr, sizeof(key->src));
+	}
+
+	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_dissector_key_ports *key, *mask;
+
+		key = skb_flow_dissector_target(cls->dissector,
+						FLOW_DISSECTOR_KEY_PORTS,
+						cls->key);
+		mask = skb_flow_dissector_target(cls->dissector,
+						 FLOW_DISSECTOR_KEY_PORTS,
+						 cls->mask);
+		fs->val.lport = cpu_to_be16(key->dst);
+		fs->mask.lport = cpu_to_be16(mask->dst);
+		fs->val.fport = cpu_to_be16(key->src);
+		fs->mask.fport = cpu_to_be16(mask->src);
+
+		/* also initialize nat_lport/fport to same values */
+		fs->nat_lport = cpu_to_be16(key->dst);
+		fs->nat_fport = cpu_to_be16(key->src);
+	}
+
+	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_dissector_key_ip *key, *mask;
+
+		key = skb_flow_dissector_target(cls->dissector,
+						FLOW_DISSECTOR_KEY_IP,
+						cls->key);
+		mask = skb_flow_dissector_target(cls->dissector,
+						 FLOW_DISSECTOR_KEY_IP,
+						 cls->mask);
+		fs->val.tos = key->tos;
+		fs->mask.tos = mask->tos;
+	}
+
+	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key, *mask;
+		u16 vlan_tci, vlan_tci_mask;
+
+		key = skb_flow_dissector_target(cls->dissector,
+						FLOW_DISSECTOR_KEY_VLAN,
+						cls->key);
+		mask = skb_flow_dissector_target(cls->dissector,
+						 FLOW_DISSECTOR_KEY_VLAN,
+						 cls->mask);
+		vlan_tci = key->vlan_id | (key->vlan_priority <<
+					   VLAN_PRIO_SHIFT);
+		vlan_tci_mask = mask->vlan_id | (mask->vlan_priority <<
+						 VLAN_PRIO_SHIFT);
+		fs->val.ivlan = cpu_to_be16(vlan_tci);
+		fs->mask.ivlan = cpu_to_be16(vlan_tci_mask);
+
+		/* Chelsio adapters use ivlan_vld bit to match vlan packets
+		 * as 802.1Q. Also, when vlan tag is present in packets,
+		 * ethtype match is used then to match on ethtype of inner
+		 * header ie. the header following the vlan header.
+		 * So, set the ivlan_vld based on ethtype info supplied by
+		 * TC for vlan packets if its 802.1Q. And then reset the
+		 * ethtype value else, hw will try to match the supplied
+		 * ethtype value with ethtype of inner header.
+		 */
+		if (fs->val.ethtype == ETH_P_8021Q) {
+			fs->val.ivlan_vld = 1;
+			fs->mask.ivlan_vld = 1;
+			fs->val.ethtype = 0;
+			fs->mask.ethtype = 0;
+		}
+	}
+
+	/* Match only packets coming from the ingress port where this
+	 * filter will be created.
+	 */
+	fs->val.iport = netdev2pinfo(dev)->port_id;
+	fs->mask.iport = ~0;
+}
+
+static int cxgb4_validate_flow_match(struct net_device *dev,
+				     struct tc_cls_flower_offload *cls)
+{
+	u16 ethtype_mask = 0;
+	u16 ethtype_key = 0;
+
+	if (cls->dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IP))) {
+		netdev_warn(dev, "Unsupported key used: 0x%x\n",
+			    cls->dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  cls->key);
+		struct flow_dissector_key_basic *mask =
+			skb_flow_dissector_target(cls->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  cls->mask);
+		ethtype_key = ntohs(key->n_proto);
+		ethtype_mask = ntohs(mask->n_proto);
+	}
+
+	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
+		u16 eth_ip_type = ethtype_key & ethtype_mask;
+		struct flow_dissector_key_ip *mask;
+
+		if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) {
+			netdev_err(dev, "IP Key supported only with IPv4/v6");
+			return -EINVAL;
+		}
+
+		mask = skb_flow_dissector_target(cls->dissector,
+						 FLOW_DISSECTOR_KEY_IP,
+						 cls->mask);
+		if (mask->ttl) {
+			netdev_warn(dev, "ttl match unsupported for offload");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask,
+			  u8 field)
+{
+	u32 set_val = val & ~mask;
+	u32 offset = 0;
+	u8 size = 1;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pedits); i++) {
+		if (pedits[i].field == field) {
+			offset = pedits[i].offset;
+			size = pedits[i].size;
+			break;
+		}
+	}
+	memcpy((u8 *)fs + offset, &set_val, size);
+}
+
+static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
+				u32 mask, u32 offset, u8 htype)
+{
+	switch (htype) {
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+		switch (offset) {
+		case PEDIT_ETH_DMAC_31_0:
+			fs->newdmac = 1;
+			offload_pedit(fs, val, mask, ETH_DMAC_31_0);
+			break;
+		case PEDIT_ETH_DMAC_47_32_SMAC_15_0:
+			if (~mask & PEDIT_ETH_DMAC_MASK)
+				offload_pedit(fs, val, mask, ETH_DMAC_47_32);
+			else
+				offload_pedit(fs, val >> 16, mask >> 16,
+					      ETH_SMAC_15_0);
+			break;
+		case PEDIT_ETH_SMAC_47_16:
+			fs->newsmac = 1;
+			offload_pedit(fs, val, mask, ETH_SMAC_47_16);
+		}
+		break;
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+		switch (offset) {
+		case PEDIT_IP4_SRC:
+			offload_pedit(fs, val, mask, IP4_SRC);
+			break;
+		case PEDIT_IP4_DST:
+			offload_pedit(fs, val, mask, IP4_DST);
+		}
+		fs->nat_mode = NAT_MODE_ALL;
+		break;
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+		switch (offset) {
+		case PEDIT_IP6_SRC_31_0:
+			offload_pedit(fs, val, mask, IP6_SRC_31_0);
+			break;
+		case PEDIT_IP6_SRC_63_32:
+			offload_pedit(fs, val, mask, IP6_SRC_63_32);
+			break;
+		case PEDIT_IP6_SRC_95_64:
+			offload_pedit(fs, val, mask, IP6_SRC_95_64);
+			break;
+		case PEDIT_IP6_SRC_127_96:
+			offload_pedit(fs, val, mask, IP6_SRC_127_96);
+			break;
+		case PEDIT_IP6_DST_31_0:
+			offload_pedit(fs, val, mask, IP6_DST_31_0);
+			break;
+		case PEDIT_IP6_DST_63_32:
+			offload_pedit(fs, val, mask, IP6_DST_63_32);
+			break;
+		case PEDIT_IP6_DST_95_64:
+			offload_pedit(fs, val, mask, IP6_DST_95_64);
+			break;
+		case PEDIT_IP6_DST_127_96:
+			offload_pedit(fs, val, mask, IP6_DST_127_96);
+		}
+		fs->nat_mode = NAT_MODE_ALL;
+		break;
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+		switch (offset) {
+		case PEDIT_TCP_SPORT_DPORT:
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+				offload_pedit(fs, cpu_to_be32(val) >> 16,
+					      cpu_to_be32(mask) >> 16,
+					      TCP_SPORT);
+			else
+				offload_pedit(fs, cpu_to_be32(val),
+					      cpu_to_be32(mask), TCP_DPORT);
+		}
+		fs->nat_mode = NAT_MODE_ALL;
+		break;
+	case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+		switch (offset) {
+		case PEDIT_UDP_SPORT_DPORT:
+			if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+				offload_pedit(fs, cpu_to_be32(val) >> 16,
+					      cpu_to_be32(mask) >> 16,
+					      UDP_SPORT);
+			else
+				offload_pedit(fs, cpu_to_be32(val),
+					      cpu_to_be32(mask), UDP_DPORT);
+		}
+		fs->nat_mode = NAT_MODE_ALL;
+	}
+}
+
+static void cxgb4_process_flow_actions(struct net_device *in,
+				       struct tc_cls_flower_offload *cls,
+				       struct ch_filter_specification *fs)
+{
+	const struct tc_action *a;
+	LIST_HEAD(actions);
+
+	tcf_exts_to_list(cls->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		if (is_tcf_gact_ok(a)) {
+			fs->action = FILTER_PASS;
+		} else if (is_tcf_gact_shot(a)) {
+			fs->action = FILTER_DROP;
+		} else if (is_tcf_mirred_egress_redirect(a)) {
+			int ifindex = tcf_mirred_ifindex(a);
+			struct net_device *out = __dev_get_by_index(dev_net(in),
+								    ifindex);
+			struct port_info *pi = netdev_priv(out);
+
+			fs->action = FILTER_SWITCH;
+			fs->eport = pi->port_id;
+		} else if (is_tcf_vlan(a)) {
+			u32 vlan_action = tcf_vlan_action(a);
+			u8 prio = tcf_vlan_push_prio(a);
+			u16 vid = tcf_vlan_push_vid(a);
+			u16 vlan_tci = (prio << VLAN_PRIO_SHIFT) | vid;
+
+			switch (vlan_action) {
+			case TCA_VLAN_ACT_POP:
+				fs->newvlan |= VLAN_REMOVE;
+				break;
+			case TCA_VLAN_ACT_PUSH:
+				fs->newvlan |= VLAN_INSERT;
+				fs->vlan = vlan_tci;
+				break;
+			case TCA_VLAN_ACT_MODIFY:
+				fs->newvlan |= VLAN_REWRITE;
+				fs->vlan = vlan_tci;
+				break;
+			default:
+				break;
+			}
+		} else if (is_tcf_pedit(a)) {
+			u32 mask, val, offset;
+			int nkeys, i;
+			u8 htype;
+
+			nkeys = tcf_pedit_nkeys(a);
+			for (i = 0; i < nkeys; i++) {
+				htype = tcf_pedit_htype(a, i);
+				mask = tcf_pedit_mask(a, i);
+				val = tcf_pedit_val(a, i);
+				offset = tcf_pedit_offset(a, i);
+
+				process_pedit_field(fs, val, mask, offset,
+						    htype);
+			}
+		}
+	}
+}
+
+static bool valid_l4_mask(u32 mask)
+{
+	u16 hi, lo;
+
+	/* Either the upper 16-bits (SPORT) OR the lower
+	 * 16-bits (DPORT) can be set, but NOT BOTH.
+	 */
+	hi = (mask >> 16) & 0xFFFF;
+	lo = mask & 0xFFFF;
+
+	return hi && lo ? false : true;
+}
+
+static bool valid_pedit_action(struct net_device *dev,
+			       const struct tc_action *a)
+{
+	u32 mask, offset;
+	u8 cmd, htype;
+	int nkeys, i;
+
+	nkeys = tcf_pedit_nkeys(a);
+	for (i = 0; i < nkeys; i++) {
+		htype = tcf_pedit_htype(a, i);
+		cmd = tcf_pedit_cmd(a, i);
+		mask = tcf_pedit_mask(a, i);
+		offset = tcf_pedit_offset(a, i);
+
+		if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) {
+			netdev_err(dev, "%s: Unsupported pedit cmd\n",
+				   __func__);
+			return false;
+		}
+
+		switch (htype) {
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+			switch (offset) {
+			case PEDIT_ETH_DMAC_31_0:
+			case PEDIT_ETH_DMAC_47_32_SMAC_15_0:
+			case PEDIT_ETH_SMAC_47_16:
+				break;
+			default:
+				netdev_err(dev, "%s: Unsupported pedit field\n",
+					   __func__);
+				return false;
+			}
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+			switch (offset) {
+			case PEDIT_IP4_SRC:
+			case PEDIT_IP4_DST:
+				break;
+			default:
+				netdev_err(dev, "%s: Unsupported pedit field\n",
+					   __func__);
+				return false;
+			}
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+			switch (offset) {
+			case PEDIT_IP6_SRC_31_0:
+			case PEDIT_IP6_SRC_63_32:
+			case PEDIT_IP6_SRC_95_64:
+			case PEDIT_IP6_SRC_127_96:
+			case PEDIT_IP6_DST_31_0:
+			case PEDIT_IP6_DST_63_32:
+			case PEDIT_IP6_DST_95_64:
+			case PEDIT_IP6_DST_127_96:
+				break;
+			default:
+				netdev_err(dev, "%s: Unsupported pedit field\n",
+					   __func__);
+				return false;
+			}
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+			switch (offset) {
+			case PEDIT_TCP_SPORT_DPORT:
+				if (!valid_l4_mask(~mask)) {
+					netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n",
+						   __func__);
+					return false;
+				}
+				break;
+			default:
+				netdev_err(dev, "%s: Unsupported pedit field\n",
+					   __func__);
+				return false;
+			}
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+			switch (offset) {
+			case PEDIT_UDP_SPORT_DPORT:
+				if (!valid_l4_mask(~mask)) {
+					netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n",
+						   __func__);
+					return false;
+				}
+				break;
+			default:
+				netdev_err(dev, "%s: Unsupported pedit field\n",
+					   __func__);
+				return false;
+			}
+			break;
+		default:
+			netdev_err(dev, "%s: Unsupported pedit type\n",
+				   __func__);
+			return false;
+		}
+	}
+	return true;
+}
+
+static int cxgb4_validate_flow_actions(struct net_device *dev,
+				       struct tc_cls_flower_offload *cls)
+{
+	const struct tc_action *a;
+	bool act_redir = false;
+	bool act_pedit = false;
+	bool act_vlan = false;
+	LIST_HEAD(actions);
+
+	tcf_exts_to_list(cls->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		if (is_tcf_gact_ok(a)) {
+			/* Do nothing */
+		} else if (is_tcf_gact_shot(a)) {
+			/* Do nothing */
+		} else if (is_tcf_mirred_egress_redirect(a)) {
+			struct adapter *adap = netdev2adap(dev);
+			struct net_device *n_dev;
+			unsigned int i, ifindex;
+			bool found = false;
+
+			ifindex = tcf_mirred_ifindex(a);
+			for_each_port(adap, i) {
+				n_dev = adap->port[i];
+				if (ifindex == n_dev->ifindex) {
+					found = true;
+					break;
+				}
+			}
+
+			/* If interface doesn't belong to our hw, then
+			 * the provided output port is not valid
+			 */
+			if (!found) {
+				netdev_err(dev, "%s: Out port invalid\n",
+					   __func__);
+				return -EINVAL;
+			}
+			act_redir = true;
+		} else if (is_tcf_vlan(a)) {
+			u16 proto = be16_to_cpu(tcf_vlan_push_proto(a));
+			u32 vlan_action = tcf_vlan_action(a);
+
+			switch (vlan_action) {
+			case TCA_VLAN_ACT_POP:
+				break;
+			case TCA_VLAN_ACT_PUSH:
+			case TCA_VLAN_ACT_MODIFY:
+				if (proto != ETH_P_8021Q) {
+					netdev_err(dev, "%s: Unsupported vlan proto\n",
+						   __func__);
+					return -EOPNOTSUPP;
+				}
+				break;
+			default:
+				netdev_err(dev, "%s: Unsupported vlan action\n",
+					   __func__);
+				return -EOPNOTSUPP;
+			}
+			act_vlan = true;
+		} else if (is_tcf_pedit(a)) {
+			bool pedit_valid = valid_pedit_action(dev, a);
+
+			if (!pedit_valid)
+				return -EOPNOTSUPP;
+			act_pedit = true;
+		} else {
+			netdev_err(dev, "%s: Unsupported action\n", __func__);
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if ((act_pedit || act_vlan) && !act_redir) {
+		netdev_err(dev, "%s: pedit/vlan rewrite invalid without egress redirect\n",
+			   __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int cxgb4_tc_flower_replace(struct net_device *dev,
+			    struct tc_cls_flower_offload *cls)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct ch_tc_flower_entry *ch_flower;
+	struct ch_filter_specification *fs;
+	struct filter_ctx ctx;
+	int fidx;
+	int ret;
+
+	if (cxgb4_validate_flow_actions(dev, cls))
+		return -EOPNOTSUPP;
+
+	if (cxgb4_validate_flow_match(dev, cls))
+		return -EOPNOTSUPP;
+
+	ch_flower = allocate_flower_entry();
+	if (!ch_flower) {
+		netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__);
+		return -ENOMEM;
+	}
+
+	fs = &ch_flower->fs;
+	fs->hitcnts = 1;
+	cxgb4_process_flow_match(dev, cls, fs);
+	cxgb4_process_flow_actions(dev, cls, fs);
+
+	fs->hash = is_filter_exact_match(adap, fs);
+	if (fs->hash) {
+		fidx = 0;
+	} else {
+		fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET);
+		if (fidx < 0) {
+			netdev_err(dev, "%s: No fidx for offload.\n", __func__);
+			ret = -ENOMEM;
+			goto free_entry;
+		}
+	}
+
+	init_completion(&ctx.completion);
+	ret = __cxgb4_set_filter(dev, fidx, fs, &ctx);
+	if (ret) {
+		netdev_err(dev, "%s: filter creation err %d\n",
+			   __func__, ret);
+		goto free_entry;
+	}
+
+	/* Wait for reply */
+	ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+	if (!ret) {
+		ret = -ETIMEDOUT;
+		goto free_entry;
+	}
+
+	ret = ctx.result;
+	/* Check if hw returned error for filter creation */
+	if (ret) {
+		netdev_err(dev, "%s: filter creation err %d\n",
+			   __func__, ret);
+		goto free_entry;
+	}
+
+	ch_flower->tc_flower_cookie = cls->cookie;
+	ch_flower->filter_id = ctx.tid;
+	ret = rhashtable_insert_fast(&adap->flower_tbl, &ch_flower->node,
+				     adap->flower_ht_params);
+	if (ret)
+		goto del_filter;
+
+	return 0;
+
+del_filter:
+	cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs);
+
+free_entry:
+	kfree(ch_flower);
+	return ret;
+}
+
+int cxgb4_tc_flower_destroy(struct net_device *dev,
+			    struct tc_cls_flower_offload *cls)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct ch_tc_flower_entry *ch_flower;
+	int ret;
+
+	ch_flower = ch_flower_lookup(adap, cls->cookie);
+	if (!ch_flower)
+		return -ENOENT;
+
+	ret = cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs);
+	if (ret)
+		goto err;
+
+	ret = rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node,
+				     adap->flower_ht_params);
+	if (ret) {
+		netdev_err(dev, "Flow remove from rhashtable failed");
+		goto err;
+	}
+	kfree_rcu(ch_flower, rcu);
+
+err:
+	return ret;
+}
+
+static void ch_flower_stats_handler(struct work_struct *work)
+{
+	struct adapter *adap = container_of(work, struct adapter,
+					    flower_stats_work);
+	struct ch_tc_flower_entry *flower_entry;
+	struct ch_tc_flower_stats *ofld_stats;
+	struct rhashtable_iter iter;
+	u64 packets;
+	u64 bytes;
+	int ret;
+
+	rhashtable_walk_enter(&adap->flower_tbl, &iter);
+	do {
+		flower_entry = ERR_PTR(rhashtable_walk_start(&iter));
+		if (IS_ERR(flower_entry))
+			goto walk_stop;
+
+		while ((flower_entry = rhashtable_walk_next(&iter)) &&
+		       !IS_ERR(flower_entry)) {
+			ret = cxgb4_get_filter_counters(adap->port[0],
+							flower_entry->filter_id,
+							&packets, &bytes,
+							flower_entry->fs.hash);
+			if (!ret) {
+				spin_lock(&flower_entry->lock);
+				ofld_stats = &flower_entry->stats;
+
+				if (ofld_stats->prev_packet_count != packets) {
+					ofld_stats->prev_packet_count = packets;
+					ofld_stats->last_used = jiffies;
+				}
+				spin_unlock(&flower_entry->lock);
+			}
+		}
+walk_stop:
+		rhashtable_walk_stop(&iter);
+	} while (flower_entry == ERR_PTR(-EAGAIN));
+	rhashtable_walk_exit(&iter);
+	mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
+}
+
+static void ch_flower_stats_cb(struct timer_list *t)
+{
+	struct adapter *adap = from_timer(adap, t, flower_stats_timer);
+
+	schedule_work(&adap->flower_stats_work);
+}
+
+int cxgb4_tc_flower_stats(struct net_device *dev,
+			  struct tc_cls_flower_offload *cls)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct ch_tc_flower_stats *ofld_stats;
+	struct ch_tc_flower_entry *ch_flower;
+	u64 packets;
+	u64 bytes;
+	int ret;
+
+	ch_flower = ch_flower_lookup(adap, cls->cookie);
+	if (!ch_flower) {
+		ret = -ENOENT;
+		goto err;
+	}
+
+	ret = cxgb4_get_filter_counters(dev, ch_flower->filter_id,
+					&packets, &bytes,
+					ch_flower->fs.hash);
+	if (ret < 0)
+		goto err;
+
+	spin_lock_bh(&ch_flower->lock);
+	ofld_stats = &ch_flower->stats;
+	if (ofld_stats->packet_count != packets) {
+		if (ofld_stats->prev_packet_count != packets)
+			ofld_stats->last_used = jiffies;
+		tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count,
+				      packets - ofld_stats->packet_count,
+				      ofld_stats->last_used);
+
+		ofld_stats->packet_count = packets;
+		ofld_stats->byte_count = bytes;
+		ofld_stats->prev_packet_count = packets;
+	}
+	spin_unlock_bh(&ch_flower->lock);
+	return 0;
+
+err:
+	return ret;
+}
+
+static const struct rhashtable_params cxgb4_tc_flower_ht_params = {
+	.nelem_hint = 384,
+	.head_offset = offsetof(struct ch_tc_flower_entry, node),
+	.key_offset = offsetof(struct ch_tc_flower_entry, tc_flower_cookie),
+	.key_len = sizeof(((struct ch_tc_flower_entry *)0)->tc_flower_cookie),
+	.max_size = 524288,
+	.min_size = 512,
+	.automatic_shrinking = true
+};
+
+int cxgb4_init_tc_flower(struct adapter *adap)
+{
+	int ret;
+
+	adap->flower_ht_params = cxgb4_tc_flower_ht_params;
+	ret = rhashtable_init(&adap->flower_tbl, &adap->flower_ht_params);
+	if (ret)
+		return ret;
+
+	INIT_WORK(&adap->flower_stats_work, ch_flower_stats_handler);
+	timer_setup(&adap->flower_stats_timer, ch_flower_stats_cb, 0);
+	mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
+	return 0;
+}
+
+void cxgb4_cleanup_tc_flower(struct adapter *adap)
+{
+	if (adap->flower_stats_timer.function)
+		del_timer_sync(&adap->flower_stats_timer);
+	cancel_work_sync(&adap->flower_stats_work);
+	rhashtable_destroy(&adap->flower_tbl);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
new file mode 100644
index 000000000000..050c8a50ae41
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -0,0 +1,120 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_FLOWER_H
+#define __CXGB4_TC_FLOWER_H
+
+#include <net/pkt_cls.h>
+
+struct ch_tc_flower_stats {
+	u64 prev_packet_count;
+	u64 packet_count;
+	u64 byte_count;
+	u64 last_used;
+};
+
+struct ch_tc_flower_entry {
+	struct ch_filter_specification fs;
+	struct ch_tc_flower_stats stats;
+	unsigned long tc_flower_cookie;
+	struct rhash_head node;
+	struct rcu_head rcu;
+	spinlock_t lock; /* lock for stats */
+	u32 filter_id;
+};
+
+enum {
+	ETH_DMAC_31_0,	/* dmac bits 0.. 31 */
+	ETH_DMAC_47_32,	/* dmac bits 32..47 */
+	ETH_SMAC_15_0,	/* smac bits 0.. 15 */
+	ETH_SMAC_47_16,	/* smac bits 16..47 */
+
+	IP4_SRC,	/* 32-bit IPv4 src  */
+	IP4_DST,	/* 32-bit IPv4 dst  */
+
+	IP6_SRC_31_0,	/* src bits 0..  31 */
+	IP6_SRC_63_32,	/* src bits 63.. 32 */
+	IP6_SRC_95_64,	/* src bits 95.. 64 */
+	IP6_SRC_127_96,	/* src bits 127..96 */
+
+	IP6_DST_31_0,	/* dst bits 0..  31 */
+	IP6_DST_63_32,	/* dst bits 63.. 32 */
+	IP6_DST_95_64,	/* dst bits 95.. 64 */
+	IP6_DST_127_96,	/* dst bits 127..96 */
+
+	TCP_SPORT,	/* 16-bit TCP sport */
+	TCP_DPORT,	/* 16-bit TCP dport */
+
+	UDP_SPORT,	/* 16-bit UDP sport */
+	UDP_DPORT,	/* 16-bit UDP dport */
+};
+
+struct ch_tc_pedit_fields {
+	u8 field;
+	u8 size;
+	u32 offset;
+};
+
+#define PEDIT_FIELDS(type, field, size, fs_field, offset) \
+	{ type## field, size, \
+		offsetof(struct ch_filter_specification, fs_field) + (offset) }
+
+#define PEDIT_ETH_DMAC_MASK		0xffff
+#define PEDIT_TCP_UDP_SPORT_MASK	0xffff
+#define PEDIT_ETH_DMAC_31_0		0x0
+#define PEDIT_ETH_DMAC_47_32_SMAC_15_0	0x4
+#define PEDIT_ETH_SMAC_47_16		0x8
+#define PEDIT_IP4_SRC			0xC
+#define PEDIT_IP4_DST			0x10
+#define PEDIT_IP6_SRC_31_0		0x8
+#define PEDIT_IP6_SRC_63_32		0xC
+#define PEDIT_IP6_SRC_95_64		0x10
+#define PEDIT_IP6_SRC_127_96		0x14
+#define PEDIT_IP6_DST_31_0		0x18
+#define PEDIT_IP6_DST_63_32		0x1C
+#define PEDIT_IP6_DST_95_64		0x20
+#define PEDIT_IP6_DST_127_96		0x24
+#define PEDIT_TCP_SPORT_DPORT		0x0
+#define PEDIT_UDP_SPORT_DPORT		0x0
+
+int cxgb4_tc_flower_replace(struct net_device *dev,
+			    struct tc_cls_flower_offload *cls);
+int cxgb4_tc_flower_destroy(struct net_device *dev,
+			    struct tc_cls_flower_offload *cls);
+int cxgb4_tc_flower_stats(struct net_device *dev,
+			  struct tc_cls_flower_offload *cls);
+
+int cxgb4_init_tc_flower(struct adapter *adap);
+void cxgb4_cleanup_tc_flower(struct adapter *adap);
+#endif /* __CXGB4_TC_FLOWER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 48970ba08bdc..cd0cd13a964d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -380,7 +380,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 			return -EINVAL;
 	}
 
-	ret = cxgb4_del_filter(dev, filter_id);
+	ret = cxgb4_del_filter(dev, filter_id, NULL);
 	if (ret)
 		goto out;
 
@@ -399,7 +399,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 				if (!test_bit(j, link->tid_map))
 					continue;
 
-				ret = __cxgb4_del_filter(dev, j, NULL);
+				ret = __cxgb4_del_filter(dev, j, NULL, NULL);
 				if (ret)
 					goto out;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 84541fce94c5..08e709ab6dd4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -212,14 +212,19 @@ struct filter_ctx {
 
 struct ch_filter_specification;
 
+int cxgb4_get_free_ftid(struct net_device *dev, int family);
 int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		       struct ch_filter_specification *fs,
 		       struct filter_ctx *ctx);
 int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+		       struct ch_filter_specification *fs,
 		       struct filter_ctx *ctx);
 int cxgb4_set_filter(struct net_device *dev, int filter_id,
 		     struct ch_filter_specification *fs);
-int cxgb4_del_filter(struct net_device *dev, int filter_id);
+int cxgb4_del_filter(struct net_device *dev, int filter_id,
+		     struct ch_filter_specification *fs);
+int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx,
+			      u64 *hitcnt, u64 *bytecnt, bool hash);
 
 static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
 {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index f7ef8871dd0b..1817a0307d26 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -422,7 +422,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh,
 	u8 lport;
 	u16 vlan;
 	struct l2t_entry *e;
-	int addr_len = neigh->tbl->key_len;
+	unsigned int addr_len = neigh->tbl->key_len;
 	u32 *addr = (u32 *)neigh->primary_key;
 	int ifidx = neigh->dev->ifindex;
 	int hash = addr_hash(d, addr, addr_len, ifidx);
@@ -536,7 +536,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh)
 	struct l2t_entry *e;
 	struct sk_buff_head *arpq = NULL;
 	struct l2t_data *d = adap->l2t;
-	int addr_len = neigh->tbl->key_len;
+	unsigned int addr_len = neigh->tbl->key_len;
 	u32 *addr = (u32 *) neigh->primary_key;
 	int ifidx = neigh->dev->ifindex;
 	int hash = addr_hash(d, addr, addr_len, ifidx);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 43f52a8fe708..922f2f937789 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1537,7 +1537,13 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
  */
 static inline int is_ofld_imm(const struct sk_buff *skb)
 {
-	return skb->len <= MAX_IMM_TX_PKT_LEN;
+	struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
+	unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
+
+	if (opcode == FW_CRYPTO_LOOKASIDE_WR)
+		return skb->len <= SGE_MAX_WR_LEN;
+	else
+		return skb->len <= MAX_IMM_TX_PKT_LEN;
 }
 
 /**
@@ -2583,11 +2589,11 @@ irq_handler_t t4_intr_handler(struct adapter *adap)
 	return t4_intr_intx;
 }
 
-static void sge_rx_timer_cb(unsigned long data)
+static void sge_rx_timer_cb(struct timer_list *t)
 {
 	unsigned long m;
 	unsigned int i;
-	struct adapter *adap = (struct adapter *)data;
+	struct adapter *adap = from_timer(adap, t, sge.rx_timer);
 	struct sge *s = &adap->sge;
 
 	for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@ -2620,11 +2626,11 @@ done:
 	mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
 }
 
-static void sge_tx_timer_cb(unsigned long data)
+static void sge_tx_timer_cb(struct timer_list *t)
 {
 	unsigned long m;
 	unsigned int i, budget;
-	struct adapter *adap = (struct adapter *)data;
+	struct adapter *adap = from_timer(adap, t, sge.tx_timer);
 	struct sge *s = &adap->sge;
 
 	for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@ -3458,8 +3464,8 @@ int t4_sge_init(struct adapter *adap)
 	/* Set up timers used for recuring callbacks to process RX and TX
 	 * administrative tasks.
 	 */
-	setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
-	setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
+	timer_setup(&s->rx_timer, sge_rx_timer_cb, 0);
+	timer_setup(&s->tx_timer, sge_tx_timer_cb, 0);
 
 	spin_lock_init(&s->intrq_lock);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c
new file mode 100644
index 000000000000..7b2207a2a130
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c
@@ -0,0 +1,247 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "smt.h"
+#include "t4_msg.h"
+#include "t4fw_api.h"
+#include "t4_regs.h"
+#include "t4_values.h"
+
+struct smt_data *t4_init_smt(void)
+{
+	unsigned int smt_size;
+	struct smt_data *s;
+	int i;
+
+	smt_size = SMT_SIZE;
+
+	s = kvzalloc(sizeof(*s) + smt_size * sizeof(struct smt_entry),
+		     GFP_KERNEL);
+	if (!s)
+		return NULL;
+	s->smt_size = smt_size;
+	rwlock_init(&s->lock);
+	for (i = 0; i < s->smt_size; ++i) {
+		s->smtab[i].idx = i;
+		s->smtab[i].state = SMT_STATE_UNUSED;
+		memset(&s->smtab[i].src_mac, 0, ETH_ALEN);
+		spin_lock_init(&s->smtab[i].lock);
+		atomic_set(&s->smtab[i].refcnt, 0);
+	}
+	return s;
+}
+
+static struct smt_entry *find_or_alloc_smte(struct smt_data *s, u8 *smac)
+{
+	struct smt_entry *first_free = NULL;
+	struct smt_entry *e, *end;
+
+	for (e = &s->smtab[0], end = &s->smtab[s->smt_size]; e != end; ++e) {
+		if (atomic_read(&e->refcnt) == 0) {
+			if (!first_free)
+				first_free = e;
+		} else {
+			if (e->state == SMT_STATE_SWITCHING) {
+				/* This entry is actually in use. See if we can
+				 * re-use it ?
+				 */
+				if (memcmp(e->src_mac, smac, ETH_ALEN) == 0)
+					goto found_reuse;
+			}
+		}
+	}
+
+	if (first_free) {
+		e = first_free;
+		goto found;
+	}
+	return NULL;
+
+found:
+	e->state = SMT_STATE_UNUSED;
+
+found_reuse:
+	return e;
+}
+
+static void t4_smte_free(struct smt_entry *e)
+{
+	spin_lock_bh(&e->lock);
+	if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
+		e->state = SMT_STATE_UNUSED;
+	}
+	spin_unlock_bh(&e->lock);
+}
+
+/**
+ * @e: smt entry to release
+ *
+ * Releases ref count and frees up an smt entry from SMT table
+ */
+void cxgb4_smt_release(struct smt_entry *e)
+{
+	if (atomic_dec_and_test(&e->refcnt))
+		t4_smte_free(e);
+}
+EXPORT_SYMBOL(cxgb4_smt_release);
+
+void do_smt_write_rpl(struct adapter *adap, const struct cpl_smt_write_rpl *rpl)
+{
+	unsigned int smtidx = TID_TID_G(GET_TID(rpl));
+	struct smt_data *s = adap->smt;
+
+	if (unlikely(rpl->status != CPL_ERR_NONE)) {
+		struct smt_entry *e = &s->smtab[smtidx];
+
+		dev_err(adap->pdev_dev,
+			"Unexpected SMT_WRITE_RPL status %u for entry %u\n",
+			rpl->status, smtidx);
+		spin_lock(&e->lock);
+		e->state = SMT_STATE_ERROR;
+		spin_unlock(&e->lock);
+		return;
+	}
+}
+
+static int write_smt_entry(struct adapter *adapter, struct smt_entry *e)
+{
+	struct cpl_t6_smt_write_req *t6req;
+	struct smt_data *s = adapter->smt;
+	struct cpl_smt_write_req *req;
+	struct sk_buff *skb;
+	int size;
+	u8 row;
+
+	if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) {
+		size = sizeof(*req);
+		skb = alloc_skb(size, GFP_ATOMIC);
+		if (!skb)
+			return -ENOMEM;
+		/* Source MAC Table (SMT) contains 256 SMAC entries
+		 * organized in 128 rows of 2 entries each.
+		 */
+		req = (struct cpl_smt_write_req *)__skb_put(skb, size);
+		INIT_TP_WR(req, 0);
+
+		/* Each row contains an SMAC pair.
+		 * LSB selects the SMAC entry within a row
+		 */
+		row = (e->idx >> 1);
+		if (e->idx & 1) {
+			req->pfvf1 = 0x0;
+			memcpy(req->src_mac1, e->src_mac, ETH_ALEN);
+
+			/* fill pfvf0/src_mac0 with entry
+			 * at prev index from smt-tab.
+			 */
+			req->pfvf0 = 0x0;
+			memcpy(req->src_mac0, s->smtab[e->idx - 1].src_mac,
+			       ETH_ALEN);
+		} else {
+			req->pfvf0 = 0x0;
+			memcpy(req->src_mac0, e->src_mac, ETH_ALEN);
+
+			/* fill pfvf1/src_mac1 with entry
+			 * at next index from smt-tab
+			 */
+			req->pfvf1 = 0x0;
+			memcpy(req->src_mac1, s->smtab[e->idx + 1].src_mac,
+			       ETH_ALEN);
+		}
+	} else {
+		size = sizeof(*t6req);
+		skb = alloc_skb(size, GFP_ATOMIC);
+		if (!skb)
+			return -ENOMEM;
+		/* Source MAC Table (SMT) contains 256 SMAC entries */
+		t6req = (struct cpl_t6_smt_write_req *)__skb_put(skb, size);
+		INIT_TP_WR(t6req, 0);
+		req = (struct cpl_smt_write_req *)t6req;
+
+		/* fill pfvf0/src_mac0 from smt-tab */
+		req->pfvf0 = 0x0;
+		memcpy(req->src_mac0, s->smtab[e->idx].src_mac, ETH_ALEN);
+		row = e->idx;
+	}
+
+	OPCODE_TID(req) =
+		htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, e->idx |
+				    TID_QID_V(adapter->sge.fw_evtq.abs_id)));
+	req->params = htonl(SMTW_NORPL_V(0) |
+			    SMTW_IDX_V(row) |
+			    SMTW_OVLAN_IDX_V(0));
+	t4_mgmt_tx(adapter, skb);
+	return 0;
+}
+
+static struct smt_entry *t4_smt_alloc_switching(struct adapter *adap, u16 pfvf,
+						u8 *smac)
+{
+	struct smt_data *s = adap->smt;
+	struct smt_entry *e;
+
+	write_lock_bh(&s->lock);
+	e = find_or_alloc_smte(s, smac);
+	if (e) {
+		spin_lock(&e->lock);
+		if (!atomic_read(&e->refcnt)) {
+			atomic_set(&e->refcnt, 1);
+			e->state = SMT_STATE_SWITCHING;
+			e->pfvf = pfvf;
+			memcpy(e->src_mac, smac, ETH_ALEN);
+			write_smt_entry(adap, e);
+		} else {
+			atomic_inc(&e->refcnt);
+		}
+		spin_unlock(&e->lock);
+	}
+	write_unlock_bh(&s->lock);
+	return e;
+}
+
+/**
+ * @dev: net_device pointer
+ * @smac: MAC address to add to SMT
+ * Returns pointer to the SMT entry created
+ *
+ * Allocates an SMT entry to be used by switching rule of a filter.
+ */
+struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	return t4_smt_alloc_switching(adap, 0x0, smac);
+}
+EXPORT_SYMBOL(cxgb4_smt_alloc_switching);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.h b/drivers/net/ethernet/chelsio/cxgb4/smt.h
new file mode 100644
index 000000000000..d6c2cc271398
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.h
@@ -0,0 +1,76 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SMT_H
+#define __CXGB4_SMT_H
+
+#include <linux/spinlock.h>
+#include <linux/if_ether.h>
+#include <linux/atomic.h>
+
+struct adapter;
+struct cpl_smt_write_rpl;
+
+/* SMT related handling. Heavily adapted based on l2t ops in l2t.h/l2t.c
+ */
+enum {
+	SMT_STATE_SWITCHING,
+	SMT_STATE_UNUSED,
+	SMT_STATE_ERROR
+};
+
+enum {
+	SMT_SIZE = 256
+};
+
+struct smt_entry {
+	u16 state;
+	u16 idx;
+	u16 pfvf;
+	u8 src_mac[ETH_ALEN];
+	atomic_t refcnt;
+	spinlock_t lock;	/* protect smt entry add,removal */
+};
+
+struct smt_data {
+	unsigned int smt_size;
+	rwlock_t lock;
+	struct smt_entry smtab[0];
+};
+
+struct smt_data *t4_init_smt(void);
+struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac);
+void cxgb4_smt_release(struct smt_entry *e);
+void do_smt_write_rpl(struct adapter *p, const struct cpl_smt_write_rpl *rpl);
+#endif /* __CXGB4_SMT_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index b65ce26ff72f..f63210f15579 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2639,6 +2639,35 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 #define CHELSIO_VPD_UNIQUE_ID 0x82
 
 /**
+ * t4_eeprom_ptov - translate a physical EEPROM address to virtual
+ * @phys_addr: the physical EEPROM address
+ * @fn: the PCI function number
+ * @sz: size of function-specific area
+ *
+ * Translate a physical EEPROM address to virtual.  The first 1K is
+ * accessed through virtual addresses starting at 31K, the rest is
+ * accessed through virtual addresses starting at 0.
+ *
+ * The mapping is as follows:
+ * [0..1K) -> [31K..32K)
+ * [1K..1K+A) -> [31K-A..31K)
+ * [1K+A..ES) -> [0..ES-A-1K)
+ *
+ * where A = @fn * @sz, and ES = EEPROM size.
+ */
+int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
+{
+	fn *= sz;
+	if (phys_addr < 1024)
+		return phys_addr + (31 << 10);
+	if (phys_addr < 1024 + fn)
+		return 31744 - fn + phys_addr - 1024;
+	if (phys_addr < EEPROMSIZE)
+		return phys_addr - 1024 - fn;
+	return -EINVAL;
+}
+
+/**
  *	t4_seeprom_wp - enable/disable EEPROM write protection
  *	@adapter: the adapter
  *	@enable: whether to enable or disable write protection
@@ -5052,23 +5081,26 @@ static unsigned int t4_use_ldst(struct adapter *adap)
 }
 
 /**
- *	t4_fw_tp_pio_rw - Access TP PIO through LDST
- *	@adap: the adapter
- *	@vals: where the indirect register values are stored/written
- *	@nregs: how many indirect registers to read/write
- *	@start_idx: index of first indirect register to read/write
- *	@rw: Read (1) or Write (0)
+ * t4_tp_fw_ldst_rw - Access TP indirect register through LDST
+ * @adap: the adapter
+ * @cmd: TP fw ldst address space type
+ * @vals: where the indirect register values are stored/written
+ * @nregs: how many indirect registers to read/write
+ * @start_idx: index of first indirect register to read/write
+ * @rw: Read (1) or Write (0)
+ * @sleep_ok: if true we may sleep while awaiting command completion
  *
- *	Access TP PIO registers through LDST
+ * Access TP indirect registers through LDST
  */
-static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs,
-			    unsigned int start_index, unsigned int rw)
+static int t4_tp_fw_ldst_rw(struct adapter *adap, int cmd, u32 *vals,
+			    unsigned int nregs, unsigned int start_index,
+			    unsigned int rw, bool sleep_ok)
 {
-	int ret, i;
-	int cmd = FW_LDST_ADDRSPC_TP_PIO;
+	int ret = 0;
+	unsigned int i;
 	struct fw_ldst_cmd c;
 
-	for (i = 0 ; i < nregs; i++) {
+	for (i = 0; i < nregs; i++) {
 		memset(&c, 0, sizeof(c));
 		c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) |
 						FW_CMD_REQUEST_F |
@@ -5079,26 +5111,147 @@ static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs,
 
 		c.u.addrval.addr = cpu_to_be32(start_index + i);
 		c.u.addrval.val  = rw ? 0 : cpu_to_be32(vals[i]);
-		ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
-		if (!ret && rw)
+		ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c,
+				      sleep_ok);
+		if (ret)
+			return ret;
+
+		if (rw)
 			vals[i] = be32_to_cpu(c.u.addrval.val);
 	}
+	return 0;
+}
+
+/**
+ * t4_tp_indirect_rw - Read/Write TP indirect register through LDST or backdoor
+ * @adap: the adapter
+ * @reg_addr: Address Register
+ * @reg_data: Data register
+ * @buff: where the indirect register values are stored/written
+ * @nregs: how many indirect registers to read/write
+ * @start_index: index of first indirect register to read/write
+ * @rw: READ(1) or WRITE(0)
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read/Write TP indirect registers through LDST if possible.
+ * Else, use backdoor access
+ **/
+static void t4_tp_indirect_rw(struct adapter *adap, u32 reg_addr, u32 reg_data,
+			      u32 *buff, u32 nregs, u32 start_index, int rw,
+			      bool sleep_ok)
+{
+	int rc = -EINVAL;
+	int cmd;
+
+	switch (reg_addr) {
+	case TP_PIO_ADDR_A:
+		cmd = FW_LDST_ADDRSPC_TP_PIO;
+		break;
+	case TP_TM_PIO_ADDR_A:
+		cmd = FW_LDST_ADDRSPC_TP_TM_PIO;
+		break;
+	case TP_MIB_INDEX_A:
+		cmd = FW_LDST_ADDRSPC_TP_MIB;
+		break;
+	default:
+		goto indirect_access;
+	}
+
+	if (t4_use_ldst(adap))
+		rc = t4_tp_fw_ldst_rw(adap, cmd, buff, nregs, start_index, rw,
+				      sleep_ok);
+
+indirect_access:
+
+	if (rc) {
+		if (rw)
+			t4_read_indirect(adap, reg_addr, reg_data, buff, nregs,
+					 start_index);
+		else
+			t4_write_indirect(adap, reg_addr, reg_data, buff, nregs,
+					  start_index);
+	}
+}
+
+/**
+ * t4_tp_pio_read - Read TP PIO registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are written
+ * @nregs: how many indirect registers to read
+ * @start_index: index of first indirect register to read
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read TP PIO Registers
+ **/
+void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+		    u32 start_index, bool sleep_ok)
+{
+	t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs,
+			  start_index, 1, sleep_ok);
+}
+
+/**
+ * t4_tp_pio_write - Write TP PIO registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are stored
+ * @nregs: how many indirect registers to write
+ * @start_index: index of first indirect register to write
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Write TP PIO Registers
+ **/
+static void t4_tp_pio_write(struct adapter *adap, u32 *buff, u32 nregs,
+			    u32 start_index, bool sleep_ok)
+{
+	t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs,
+			  start_index, 0, sleep_ok);
+}
+
+/**
+ * t4_tp_tm_pio_read - Read TP TM PIO registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are written
+ * @nregs: how many indirect registers to read
+ * @start_index: index of first indirect register to read
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read TP TM PIO Registers
+ **/
+void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+		       u32 start_index, bool sleep_ok)
+{
+	t4_tp_indirect_rw(adap, TP_TM_PIO_ADDR_A, TP_TM_PIO_DATA_A, buff,
+			  nregs, start_index, 1, sleep_ok);
+}
+
+/**
+ * t4_tp_mib_read - Read TP MIB registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are written
+ * @nregs: how many indirect registers to read
+ * @start_index: index of first indirect register to read
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read TP MIB Registers
+ **/
+void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index,
+		    bool sleep_ok)
+{
+	t4_tp_indirect_rw(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, buff, nregs,
+			  start_index, 1, sleep_ok);
 }
 
 /**
  *	t4_read_rss_key - read the global RSS key
  *	@adap: the adapter
  *	@key: 10-entry array holding the 320-bit RSS key
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Reads the global 320-bit RSS key.
  */
-void t4_read_rss_key(struct adapter *adap, u32 *key)
+void t4_read_rss_key(struct adapter *adap, u32 *key, bool sleep_ok)
 {
-	if (t4_use_ldst(adap))
-		t4_fw_tp_pio_rw(adap, key, 10, TP_RSS_SECRET_KEY0_A, 1);
-	else
-		t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10,
-				 TP_RSS_SECRET_KEY0_A);
+	t4_tp_pio_read(adap, key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok);
 }
 
 /**
@@ -5106,12 +5259,14 @@ void t4_read_rss_key(struct adapter *adap, u32 *key)
  *	@adap: the adapter
  *	@key: 10-entry array holding the 320-bit RSS key
  *	@idx: which RSS key to write
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Writes one of the RSS keys with the given 320-bit value.  If @idx is
  *	0..15 the corresponding entry in the RSS key table is written,
  *	otherwise the global RSS key is written.
  */
-void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx)
+void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx,
+		      bool sleep_ok)
 {
 	u8 rss_key_addr_cnt = 16;
 	u32 vrt = t4_read_reg(adap, TP_RSS_CONFIG_VRT_A);
@@ -5124,11 +5279,7 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx)
 	    (vrt & KEYEXTEND_F) && (KEYMODE_G(vrt) == 3))
 		rss_key_addr_cnt = 32;
 
-	if (t4_use_ldst(adap))
-		t4_fw_tp_pio_rw(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, 0);
-	else
-		t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10,
-				  TP_RSS_SECRET_KEY0_A);
+	t4_tp_pio_write(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok);
 
 	if (idx >= 0 && idx < rss_key_addr_cnt) {
 		if (rss_key_addr_cnt > 16)
@@ -5146,19 +5297,15 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx)
  *	@adapter: the adapter
  *	@index: the entry in the PF RSS table to read
  *	@valp: where to store the returned value
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Reads the PF RSS Configuration Table at the specified index and returns
  *	the value found there.
  */
 void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index,
-			   u32 *valp)
+			   u32 *valp, bool sleep_ok)
 {
-	if (t4_use_ldst(adapter))
-		t4_fw_tp_pio_rw(adapter, valp, 1,
-				TP_RSS_PF0_CONFIG_A + index, 1);
-	else
-		t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
-				 valp, 1, TP_RSS_PF0_CONFIG_A + index);
+	t4_tp_pio_read(adapter, valp, 1, TP_RSS_PF0_CONFIG_A + index, sleep_ok);
 }
 
 /**
@@ -5167,12 +5314,13 @@ void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index,
  *	@index: the entry in the VF RSS table to read
  *	@vfl: where to store the returned VFL
  *	@vfh: where to store the returned VFH
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Reads the VF RSS Configuration Table at the specified index and returns
  *	the (VFL, VFH) values found there.
  */
 void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index,
-			   u32 *vfl, u32 *vfh)
+			   u32 *vfl, u32 *vfh, bool sleep_ok)
 {
 	u32 vrt, mask, data;
 
@@ -5193,50 +5341,37 @@ void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index,
 
 	/* Grab the VFL/VFH values ...
 	 */
-	if (t4_use_ldst(adapter)) {
-		t4_fw_tp_pio_rw(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, 1);
-		t4_fw_tp_pio_rw(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, 1);
-	} else {
-		t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
-				 vfl, 1, TP_RSS_VFL_CONFIG_A);
-		t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
-				 vfh, 1, TP_RSS_VFH_CONFIG_A);
-	}
+	t4_tp_pio_read(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, sleep_ok);
+	t4_tp_pio_read(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, sleep_ok);
 }
 
 /**
  *	t4_read_rss_pf_map - read PF RSS Map
  *	@adapter: the adapter
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Reads the PF RSS Map register and returns its value.
  */
-u32 t4_read_rss_pf_map(struct adapter *adapter)
+u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok)
 {
 	u32 pfmap;
 
-	if (t4_use_ldst(adapter))
-		t4_fw_tp_pio_rw(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, 1);
-	else
-		t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
-				 &pfmap, 1, TP_RSS_PF_MAP_A);
+	t4_tp_pio_read(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, sleep_ok);
 	return pfmap;
 }
 
 /**
  *	t4_read_rss_pf_mask - read PF RSS Mask
  *	@adapter: the adapter
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Reads the PF RSS Mask register and returns its value.
  */
-u32 t4_read_rss_pf_mask(struct adapter *adapter)
+u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok)
 {
 	u32 pfmask;
 
-	if (t4_use_ldst(adapter))
-		t4_fw_tp_pio_rw(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, 1);
-	else
-		t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
-				 &pfmask, 1, TP_RSS_PF_MSK_A);
+	t4_tp_pio_read(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, sleep_ok);
 	return pfmask;
 }
 
@@ -5245,12 +5380,13 @@ u32 t4_read_rss_pf_mask(struct adapter *adapter)
  *	@adap: the adapter
  *	@v4: holds the TCP/IP counter values
  *	@v6: holds the TCP/IPv6 counter values
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Returns the values of TP's TCP/IP and TCP/IPv6 MIB counters.
  *	Either @v4 or @v6 may be %NULL to skip the corresponding stats.
  */
 void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
-			 struct tp_tcp_stats *v6)
+			 struct tp_tcp_stats *v6, bool sleep_ok)
 {
 	u32 val[TP_MIB_TCP_RXT_SEG_LO_A - TP_MIB_TCP_OUT_RST_A + 1];
 
@@ -5259,16 +5395,16 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
 #define STAT64(x)   (((u64)STAT(x##_HI) << 32) | STAT(x##_LO))
 
 	if (v4) {
-		t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val,
-				 ARRAY_SIZE(val), TP_MIB_TCP_OUT_RST_A);
+		t4_tp_mib_read(adap, val, ARRAY_SIZE(val),
+			       TP_MIB_TCP_OUT_RST_A, sleep_ok);
 		v4->tcp_out_rsts = STAT(OUT_RST);
 		v4->tcp_in_segs  = STAT64(IN_SEG);
 		v4->tcp_out_segs = STAT64(OUT_SEG);
 		v4->tcp_retrans_segs = STAT64(RXT_SEG);
 	}
 	if (v6) {
-		t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val,
-				 ARRAY_SIZE(val), TP_MIB_TCP_V6OUT_RST_A);
+		t4_tp_mib_read(adap, val, ARRAY_SIZE(val),
+			       TP_MIB_TCP_V6OUT_RST_A, sleep_ok);
 		v6->tcp_out_rsts = STAT(OUT_RST);
 		v6->tcp_in_segs  = STAT64(IN_SEG);
 		v6->tcp_out_segs = STAT64(OUT_SEG);
@@ -5283,63 +5419,66 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
  *	t4_tp_get_err_stats - read TP's error MIB counters
  *	@adap: the adapter
  *	@st: holds the counter values
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Returns the values of TP's error counters.
  */
-void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st)
+void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st,
+			 bool sleep_ok)
 {
 	int nchan = adap->params.arch.nchan;
 
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->tnl_cong_drops, nchan, TP_MIB_TNL_CNG_DROP_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->ofld_chan_drops, nchan, TP_MIB_OFD_CHN_DROP_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->ofld_vlan_drops, nchan, TP_MIB_OFD_VLN_DROP_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 st->tcp6_in_errs, nchan, TP_MIB_TCP_V6IN_ERR_0_A);
-
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-			 &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A);
+	t4_tp_mib_read(adap, st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A,
+		       sleep_ok);
+	t4_tp_mib_read(adap, st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A,
+		       sleep_ok);
+	t4_tp_mib_read(adap, st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A,
+		       sleep_ok);
+	t4_tp_mib_read(adap, st->tnl_cong_drops, nchan,
+		       TP_MIB_TNL_CNG_DROP_0_A, sleep_ok);
+	t4_tp_mib_read(adap, st->ofld_chan_drops, nchan,
+		       TP_MIB_OFD_CHN_DROP_0_A, sleep_ok);
+	t4_tp_mib_read(adap, st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A,
+		       sleep_ok);
+	t4_tp_mib_read(adap, st->ofld_vlan_drops, nchan,
+		       TP_MIB_OFD_VLN_DROP_0_A, sleep_ok);
+	t4_tp_mib_read(adap, st->tcp6_in_errs, nchan,
+		       TP_MIB_TCP_V6IN_ERR_0_A, sleep_ok);
+	t4_tp_mib_read(adap, &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A,
+		       sleep_ok);
 }
 
 /**
  *	t4_tp_get_cpl_stats - read TP's CPL MIB counters
  *	@adap: the adapter
  *	@st: holds the counter values
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Returns the values of TP's CPL counters.
  */
-void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st)
+void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st,
+			 bool sleep_ok)
 {
 	int nchan = adap->params.arch.nchan;
 
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
-			 nchan, TP_MIB_CPL_IN_REQ_0_A);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp,
-			 nchan, TP_MIB_CPL_OUT_RSP_0_A);
+	t4_tp_mib_read(adap, st->req, nchan, TP_MIB_CPL_IN_REQ_0_A, sleep_ok);
 
+	t4_tp_mib_read(adap, st->rsp, nchan, TP_MIB_CPL_OUT_RSP_0_A, sleep_ok);
 }
 
 /**
  *	t4_tp_get_rdma_stats - read TP's RDMA MIB counters
  *	@adap: the adapter
  *	@st: holds the counter values
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Returns the values of TP's RDMA counters.
  */
-void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st)
+void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st,
+			  bool sleep_ok)
 {
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->rqe_dfr_pkt,
-			 2, TP_MIB_RQE_DFR_PKT_A);
+	t4_tp_mib_read(adap, &st->rqe_dfr_pkt, 2, TP_MIB_RQE_DFR_PKT_A,
+		       sleep_ok);
 }
 
 /**
@@ -5347,20 +5486,24 @@ void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st)
  *	@adap: the adapter
  *	@idx: the port index
  *	@st: holds the counter values
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Returns the values of TP's FCoE counters for the selected port.
  */
 void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx,
-		       struct tp_fcoe_stats *st)
+		       struct tp_fcoe_stats *st, bool sleep_ok)
 {
 	u32 val[2];
 
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_ddp,
-			 1, TP_MIB_FCOE_DDP_0_A + idx);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_drop,
-			 1, TP_MIB_FCOE_DROP_0_A + idx);
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val,
-			 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx);
+	t4_tp_mib_read(adap, &st->frames_ddp, 1, TP_MIB_FCOE_DDP_0_A + idx,
+		       sleep_ok);
+
+	t4_tp_mib_read(adap, &st->frames_drop, 1,
+		       TP_MIB_FCOE_DROP_0_A + idx, sleep_ok);
+
+	t4_tp_mib_read(adap, val, 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx,
+		       sleep_ok);
+
 	st->octets_ddp = ((u64)val[0] << 32) | val[1];
 }
 
@@ -5368,15 +5511,16 @@ void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx,
  *	t4_get_usm_stats - read TP's non-TCP DDP MIB counters
  *	@adap: the adapter
  *	@st: holds the counter values
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *	Returns the values of TP's counters for non-TCP directly-placed packets.
  */
-void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st)
+void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st,
+		      bool sleep_ok)
 {
 	u32 val[4];
 
-	t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, 4,
-			 TP_MIB_USM_PKTS_A);
+	t4_tp_mib_read(adap, val, 4, TP_MIB_USM_PKTS_A, sleep_ok);
 	st->frames = val[0];
 	st->drops = val[1];
 	st->octets = ((u64)val[2] << 32) | val[3];
@@ -8205,7 +8349,7 @@ struct flash_desc {
 	u32 size_mb;
 };
 
-static int get_flash_params(struct adapter *adap)
+static int t4_get_flash_params(struct adapter *adap)
 {
 	/* Table for non-Numonix supported flash parts.  Numonix parts are left
 	 * to the preexisting code.  All flash parts have 64KB sectors.
@@ -8214,40 +8358,137 @@ static int get_flash_params(struct adapter *adap)
 		{ 0x150201, 4 << 20 },       /* Spansion 4MB S25FL032P */
 	};
 
+	unsigned int part, manufacturer;
+	unsigned int density, size;
+	u32 flashid = 0;
 	int ret;
-	u32 info;
+
+	/* Issue a Read ID Command to the Flash part.  We decode supported
+	 * Flash parts and their sizes from this.  There's a newer Query
+	 * Command which can retrieve detailed geometry information but many
+	 * Flash parts don't support it.
+	 */
 
 	ret = sf1_write(adap, 1, 1, 0, SF_RD_ID);
 	if (!ret)
-		ret = sf1_read(adap, 3, 0, 1, &info);
+		ret = sf1_read(adap, 3, 0, 1, &flashid);
 	t4_write_reg(adap, SF_OP_A, 0);                    /* unlock SF */
 	if (ret)
 		return ret;
 
-	for (ret = 0; ret < ARRAY_SIZE(supported_flash); ++ret)
-		if (supported_flash[ret].vendor_and_model_id == info) {
-			adap->params.sf_size = supported_flash[ret].size_mb;
+	/* Check to see if it's one of our non-standard supported Flash parts.
+	 */
+	for (part = 0; part < ARRAY_SIZE(supported_flash); part++)
+		if (supported_flash[part].vendor_and_model_id == flashid) {
+			adap->params.sf_size = supported_flash[part].size_mb;
 			adap->params.sf_nsec =
 				adap->params.sf_size / SF_SEC_SIZE;
-			return 0;
+			goto found;
 		}
 
-	if ((info & 0xff) != 0x20)             /* not a Numonix flash */
-		return -EINVAL;
-	info >>= 16;                           /* log2 of size */
-	if (info >= 0x14 && info < 0x18)
-		adap->params.sf_nsec = 1 << (info - 16);
-	else if (info == 0x18)
-		adap->params.sf_nsec = 64;
-	else
+	/* Decode Flash part size.  The code below looks repetative with
+	 * common encodings, but that's not guaranteed in the JEDEC
+	 * specification for the Read JADEC ID command.  The only thing that
+	 * we're guaranteed by the JADEC specification is where the
+	 * Manufacturer ID is in the returned result.  After that each
+	 * Manufacturer ~could~ encode things completely differently.
+	 * Note, all Flash parts must have 64KB sectors.
+	 */
+	manufacturer = flashid & 0xff;
+	switch (manufacturer) {
+	case 0x20: { /* Micron/Numonix */
+		/* This Density -> Size decoding table is taken from Micron
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x14: /* 1MB */
+			size = 1 << 20;
+			break;
+		case 0x15: /* 2MB */
+			size = 1 << 21;
+			break;
+		case 0x16: /* 4MB */
+			size = 1 << 22;
+			break;
+		case 0x17: /* 8MB */
+			size = 1 << 23;
+			break;
+		case 0x18: /* 16MB */
+			size = 1 << 24;
+			break;
+		case 0x19: /* 32MB */
+			size = 1 << 25;
+			break;
+		case 0x20: /* 64MB */
+			size = 1 << 26;
+			break;
+		case 0x21: /* 128MB */
+			size = 1 << 27;
+			break;
+		case 0x22: /* 256MB */
+			size = 1 << 28;
+			break;
+
+		default:
+			dev_err(adap->pdev_dev, "Micron Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
+			return -EINVAL;
+		}
+		break;
+	}
+	case 0xc2: { /* Macronix */
+		/* This Density -> Size decoding table is taken from Macronix
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x17: /* 8MB */
+			size = 1 << 23;
+			break;
+		case 0x18: /* 16MB */
+			size = 1 << 24;
+			break;
+		default:
+			dev_err(adap->pdev_dev, "Macronix Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
+			return -EINVAL;
+		}
+		break;
+	}
+	case 0xef: { /* Winbond */
+		/* This Density -> Size decoding table is taken from Winbond
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x17: /* 8MB */
+			size = 1 << 23;
+			break;
+		case 0x18: /* 16MB */
+			size = 1 << 24;
+			break;
+		default:
+			dev_err(adap->pdev_dev, "Winbond Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
+			return -EINVAL;
+		}
+		break;
+	}
+	default:
+		dev_err(adap->pdev_dev, "Unsupported Flash Part, ID = %#x\n",
+			flashid);
 		return -EINVAL;
-	adap->params.sf_size = 1 << info;
-	adap->params.sf_fw_start =
-		t4_read_reg(adap, CIM_BOOT_CFG_A) & BOOTADDR_M;
+	}
+
+	/* Store decoded Flash size and fall through into vetting code. */
+	adap->params.sf_size = size;
+	adap->params.sf_nsec = size / SF_SEC_SIZE;
 
+found:
 	if (adap->params.sf_size < FLASH_MIN_SIZE)
-		dev_warn(adap->pdev_dev, "WARNING!!! FLASH size %#x < %#x!!!\n",
-			 adap->params.sf_size, FLASH_MIN_SIZE);
+		dev_warn(adap->pdev_dev, "WARNING: Flash Part ID %#x, size %#x < %#x\n",
+			 flashid, adap->params.sf_size, FLASH_MIN_SIZE);
 	return 0;
 }
 
@@ -8285,7 +8526,7 @@ int t4_prep_adapter(struct adapter *adapter)
 	get_pci_mode(adapter, &adapter->params.pci);
 	pl_rev = REV_G(t4_read_reg(adapter, PL_REV_A));
 
-	ret = get_flash_params(adapter);
+	ret = t4_get_flash_params(adapter);
 	if (ret < 0) {
 		dev_err(adapter->pdev_dev, "error %d identifying flash\n", ret);
 		return ret;
@@ -8567,10 +8808,11 @@ int t4_init_sge_params(struct adapter *adapter)
 /**
  *      t4_init_tp_params - initialize adap->params.tp
  *      @adap: the adapter
+ *      @sleep_ok: if true we may sleep while awaiting command completion
  *
  *      Initialize various fields of the adapter's TP Parameters structure.
  */
-int t4_init_tp_params(struct adapter *adap)
+int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
 {
 	int chan;
 	u32 v;
@@ -8586,19 +8828,11 @@ int t4_init_tp_params(struct adapter *adap)
 	/* Cache the adapter's Compressed Filter Mode and global Incress
 	 * Configuration.
 	 */
-	if (t4_use_ldst(adap)) {
-		t4_fw_tp_pio_rw(adap, &adap->params.tp.vlan_pri_map, 1,
-				TP_VLAN_PRI_MAP_A, 1);
-		t4_fw_tp_pio_rw(adap, &adap->params.tp.ingress_config, 1,
-				TP_INGRESS_CONFIG_A, 1);
-	} else {
-		t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
-				 &adap->params.tp.vlan_pri_map, 1,
-				 TP_VLAN_PRI_MAP_A);
-		t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
-				 &adap->params.tp.ingress_config, 1,
-				 TP_INGRESS_CONFIG_A);
-	}
+	t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
+		       TP_VLAN_PRI_MAP_A, sleep_ok);
+	t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1,
+		       TP_INGRESS_CONFIG_A, sleep_ok);
+
 	/* For T6, cache the adapter's compressed error vector
 	 * and passing outer header info for encapsulated packets.
 	 */
@@ -8611,11 +8845,21 @@ int t4_init_tp_params(struct adapter *adap)
 	 * shift positions of several elements of the Compressed Filter Tuple
 	 * for this adapter which we need frequently ...
 	 */
-	adap->params.tp.vlan_shift = t4_filter_field_shift(adap, VLAN_F);
-	adap->params.tp.vnic_shift = t4_filter_field_shift(adap, VNIC_ID_F);
+	adap->params.tp.fcoe_shift = t4_filter_field_shift(adap, FCOE_F);
 	adap->params.tp.port_shift = t4_filter_field_shift(adap, PORT_F);
+	adap->params.tp.vnic_shift = t4_filter_field_shift(adap, VNIC_ID_F);
+	adap->params.tp.vlan_shift = t4_filter_field_shift(adap, VLAN_F);
+	adap->params.tp.tos_shift = t4_filter_field_shift(adap, TOS_F);
 	adap->params.tp.protocol_shift = t4_filter_field_shift(adap,
 							       PROTOCOL_F);
+	adap->params.tp.ethertype_shift = t4_filter_field_shift(adap,
+								ETHERTYPE_F);
+	adap->params.tp.macmatch_shift = t4_filter_field_shift(adap,
+							       MACMATCH_F);
+	adap->params.tp.matchtype_shift = t4_filter_field_shift(adap,
+								MPSHITTYPE_F);
+	adap->params.tp.frag_shift = t4_filter_field_shift(adap,
+							   FRAGMENTATION_F);
 
 	/* If TP_INGRESS_CONFIG.VNID == 0, then TP_VLAN_PRI_MAP.VNIC_ID
 	 * represents the presence of an Outer VLAN instead of a VNIC ID.
@@ -8623,6 +8867,10 @@ int t4_init_tp_params(struct adapter *adap)
 	if ((adap->params.tp.ingress_config & VNIC_F) == 0)
 		adap->params.tp.vnic_shift = -1;
 
+	v = t4_read_reg(adap, LE_3_DB_HASH_MASK_GEN_IPV4_T6_A);
+	adap->params.tp.hash_filter_mask = v;
+	v = t4_read_reg(adap, LE_4_DB_HASH_MASK_GEN_IPV4_T6_A);
+	adap->params.tp.hash_filter_mask |= ((u64)v << 32);
 	return 0;
 }
 
@@ -9342,6 +9590,125 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
 	return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd);
 }
 
+/**
+ * t4_read_pace_tbl - read the pace table
+ * @adap: the adapter
+ * @pace_vals: holds the returned values
+ *
+ * Returns the values of TP's pace table in microseconds.
+ */
+void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED])
+{
+	unsigned int i, v;
+
+	for (i = 0; i < NTX_SCHED; i++) {
+		t4_write_reg(adap, TP_PACE_TABLE_A, 0xffff0000 + i);
+		v = t4_read_reg(adap, TP_PACE_TABLE_A);
+		pace_vals[i] = dack_ticks_to_usec(adap, v);
+	}
+}
+
+/**
+ * t4_get_tx_sched - get the configuration of a Tx HW traffic scheduler
+ * @adap: the adapter
+ * @sched: the scheduler index
+ * @kbps: the byte rate in Kbps
+ * @ipg: the interpacket delay in tenths of nanoseconds
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Return the current configuration of a HW Tx scheduler.
+ */
+void t4_get_tx_sched(struct adapter *adap, unsigned int sched,
+		     unsigned int *kbps, unsigned int *ipg, bool sleep_ok)
+{
+	unsigned int v, addr, bpt, cpt;
+
+	if (kbps) {
+		addr = TP_TX_MOD_Q1_Q0_RATE_LIMIT_A - sched / 2;
+		t4_tp_tm_pio_read(adap, &v, 1, addr, sleep_ok);
+		if (sched & 1)
+			v >>= 16;
+		bpt = (v >> 8) & 0xff;
+		cpt = v & 0xff;
+		if (!cpt) {
+			*kbps = 0;	/* scheduler disabled */
+		} else {
+			v = (adap->params.vpd.cclk * 1000) / cpt; /* ticks/s */
+			*kbps = (v * bpt) / 125;
+		}
+	}
+	if (ipg) {
+		addr = TP_TX_MOD_Q1_Q0_TIMER_SEPARATOR_A - sched / 2;
+		t4_tp_tm_pio_read(adap, &v, 1, addr, sleep_ok);
+		if (sched & 1)
+			v >>= 16;
+		v &= 0xffff;
+		*ipg = (10000 * v) / core_ticks_per_usec(adap);
+	}
+}
+
+/* t4_sge_ctxt_rd - read an SGE context through FW
+ * @adap: the adapter
+ * @mbox: mailbox to use for the FW command
+ * @cid: the context id
+ * @ctype: the context type
+ * @data: where to store the context data
+ *
+ * Issues a FW command through the given mailbox to read an SGE context.
+ */
+int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid,
+		   enum ctxt_type ctype, u32 *data)
+{
+	struct fw_ldst_cmd c;
+	int ret;
+
+	if (ctype == CTXT_FLM)
+		ret = FW_LDST_ADDRSPC_SGE_FLMC;
+	else
+		ret = FW_LDST_ADDRSPC_SGE_CONMC;
+
+	memset(&c, 0, sizeof(c));
+	c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) |
+					FW_CMD_REQUEST_F | FW_CMD_READ_F |
+					FW_LDST_CMD_ADDRSPACE_V(ret));
+	c.cycles_to_len16 = cpu_to_be32(FW_LEN16(c));
+	c.u.idctxt.physid = cpu_to_be32(cid);
+
+	ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c);
+	if (ret == 0) {
+		data[0] = be32_to_cpu(c.u.idctxt.ctxt_data0);
+		data[1] = be32_to_cpu(c.u.idctxt.ctxt_data1);
+		data[2] = be32_to_cpu(c.u.idctxt.ctxt_data2);
+		data[3] = be32_to_cpu(c.u.idctxt.ctxt_data3);
+		data[4] = be32_to_cpu(c.u.idctxt.ctxt_data4);
+		data[5] = be32_to_cpu(c.u.idctxt.ctxt_data5);
+	}
+	return ret;
+}
+
+/**
+ * t4_sge_ctxt_rd_bd - read an SGE context bypassing FW
+ * @adap: the adapter
+ * @cid: the context id
+ * @ctype: the context type
+ * @data: where to store the context data
+ *
+ * Reads an SGE context directly, bypassing FW.  This is only for
+ * debugging when FW is unavailable.
+ */
+int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
+		      enum ctxt_type ctype, u32 *data)
+{
+	int i, ret;
+
+	t4_write_reg(adap, SGE_CTXT_CMD_A, CTXTQID_V(cid) | CTXTTYPE_V(ctype));
+	ret = t4_wait_op_done(adap, SGE_CTXT_CMD_A, BUSY_F, 0, 3, 1);
+	if (!ret)
+		for (i = SGE_CTXT_DATA0_A; i <= SGE_CTXT_DATA5_A; i += 4)
+			*data++ = t4_read_reg(adap, i);
+	return ret;
+}
+
 int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 		    int rateunit, int ratemode, int channel, int class,
 		    int minrate, int maxrate, int weight, int pktsize)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index 7f59ca458431..a964ed184356 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -47,6 +47,7 @@ enum {
 	TCB_SIZE       = 128,   /* TCB size */
 	NMTUS          = 16,    /* size of MTU table */
 	NCCTRL_WIN     = 32,    /* # of congestion control windows */
+	NTX_SCHED      = 8,     /* # of HW Tx scheduling queues */
 	PM_NSTATS      = 5,     /* # of PM stats */
 	T6_PM_NSTATS   = 7,     /* # of PM stats in T6 */
 	MBOX_LEN       = 64,    /* mailbox size in bytes */
@@ -67,6 +68,12 @@ enum {
 	ULPRX_LA_SIZE  = 512,   /* # of 256-bit words in ULP_RX LA */
 };
 
+/* SGE context types */
+enum ctxt_type {
+	CTXT_FLM = 2,
+	CTXT_CNM,
+};
+
 enum {
 	SF_PAGE_SIZE = 256,           /* serial flash page size */
 	SF_SEC_SIZE = 64 * 1024,      /* serial flash sector size */
@@ -78,6 +85,7 @@ enum { MBOX_OWNER_NONE, MBOX_OWNER_FW, MBOX_OWNER_DRV };    /* mailbox owners */
 
 enum {
 	SGE_MAX_WR_LEN = 512,     /* max WR size in bytes */
+	SGE_CTXT_SIZE = 24,       /* size of SGE context */
 	SGE_NTIMERS = 6,          /* # of interrupt holdoff timer values */
 	SGE_NCOUNTERS = 4,        /* # of interrupt packet counter values */
 	SGE_MAX_IQ_SIZE = 65520,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index b0ff78da8aa2..7e12f241145b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -50,6 +50,7 @@ enum {
 	CPL_RX_DATA_ACK       = 0xD,
 	CPL_TX_PKT            = 0xE,
 	CPL_L2T_WRITE_REQ     = 0x12,
+	CPL_SMT_WRITE_REQ     = 0x14,
 	CPL_TID_RELEASE       = 0x1A,
 	CPL_TX_DATA_ISO	      = 0x1F,
 
@@ -60,6 +61,7 @@ enum {
 	CPL_PEER_CLOSE        = 0x26,
 	CPL_ABORT_REQ_RSS     = 0x2B,
 	CPL_ABORT_RPL_RSS     = 0x2D,
+	CPL_SMT_WRITE_RPL     = 0x2E,
 
 	CPL_RX_PHYS_ADDR      = 0x30,
 	CPL_CLOSE_CON_RPL     = 0x32,
@@ -284,6 +286,7 @@ struct work_request_hdr {
 
 #define RX_CHANNEL_S    26
 #define RX_CHANNEL_V(x) ((x) << RX_CHANNEL_S)
+#define RX_CHANNEL_F	RX_CHANNEL_V(1U)
 
 #define WND_SCALE_EN_S    28
 #define WND_SCALE_EN_V(x) ((x) << WND_SCALE_EN_S)
@@ -313,6 +316,10 @@ struct cpl_pass_open_req {
 #define DELACK_V(x) ((x) << DELACK_S)
 #define DELACK_F    DELACK_V(1U)
 
+#define NON_OFFLOAD_S		7
+#define NON_OFFLOAD_V(x)	((x) << NON_OFFLOAD_S)
+#define NON_OFFLOAD_F		NON_OFFLOAD_V(1U)
+
 #define DSCP_S    22
 #define DSCP_M    0x3F
 #define DSCP_V(x) ((x) << DSCP_S)
@@ -681,8 +688,8 @@ struct cpl_set_tcb_field {
 };
 
 /* cpl_set_tcb_field.word_cookie fields */
-#define TCB_WORD_S    0
-#define TCB_WORD(x)   ((x) << TCB_WORD_S)
+#define TCB_WORD_S	0
+#define TCB_WORD_V(x)	((x) << TCB_WORD_S)
 
 #define TCB_COOKIE_S    5
 #define TCB_COOKIE_M    0x7
@@ -1266,6 +1273,44 @@ struct cpl_l2t_write_rpl {
 	u8 rsvd[3];
 };
 
+struct cpl_smt_write_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__be32 params;
+	__be16 pfvf1;
+	u8 src_mac1[6];
+	__be16 pfvf0;
+	u8 src_mac0[6];
+};
+
+struct cpl_t6_smt_write_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__be32 params;
+	__be64 tag;
+	__be16 pfvf0;
+	u8 src_mac0[6];
+	__be32 local_ip;
+	__be32 rsvd;
+};
+
+struct cpl_smt_write_rpl {
+	union opcode_tid ot;
+	u8 status;
+	u8 rsvd[3];
+};
+
+/* cpl_smt_{read,write}_req.params fields */
+#define SMTW_OVLAN_IDX_S	16
+#define SMTW_OVLAN_IDX_V(x)	((x) << SMTW_OVLAN_IDX_S)
+
+#define SMTW_IDX_S	20
+#define SMTW_IDX_V(x)	((x) << SMTW_IDX_S)
+
+#define SMTW_NORPL_S	31
+#define SMTW_NORPL_V(x)	((x) << SMTW_NORPL_S)
+#define SMTW_NORPL_F	SMTW_NORPL_V(1U)
+
 struct cpl_rdma_terminate {
 	union opcode_tid ot;
 	__be16 rsvd;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index aa28299aef5f..60cf9e02de5d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -176,6 +176,13 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x50a2), /* Custom T540-KR4 */
 	CH_PCI_ID_TABLE_FENTRY(0x50a3), /* Custom T580-KR4 */
 	CH_PCI_ID_TABLE_FENTRY(0x50a4), /* Custom 2x T540-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x50a5), /* Custom T522-BT */
+	CH_PCI_ID_TABLE_FENTRY(0x50a6), /* Custom T522-BT-SO */
+	CH_PCI_ID_TABLE_FENTRY(0x50a7), /* Custom T580-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x50a8), /* Custom T580-KR */
+	CH_PCI_ID_TABLE_FENTRY(0x50a9), /* Custom T580-KR */
+	CH_PCI_ID_TABLE_FENTRY(0x50aa), /* Custom T580-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */
 
 	/* T6 adapters:
 	 */
@@ -197,6 +204,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x6082), /* Custom T6225-CR SFP28 */
 	CH_PCI_ID_TABLE_FENTRY(0x6083), /* Custom T62100-CR QSFP28 */
 	CH_PCI_ID_TABLE_FENTRY(0x6084), /* Custom T64100-CR QSFP28 */
+	CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */
+	CH_PCI_ID_TABLE_FENTRY(0x6086), /* Custom T6225-SO-CR */
 CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
 
 #endif /* __T4_PCI_ID_TBL_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index dac90837842b..a7cfece72828 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -65,6 +65,9 @@
 
 #define PCIE_FW_REG(reg_addr, idx) ((reg_addr) + (idx) * 4)
 
+#define NUM_LE_DB_DBGI_REQ_DATA_INSTANCES 17
+#define NUM_LE_DB_DBGI_RSP_DATA_INSTANCES 17
+
 #define SGE_PF_KDOORBELL_A 0x0
 
 #define QID_S    15
@@ -150,6 +153,23 @@
 #define T6_DBVFIFO_SIZE_M    0x1fffU
 #define T6_DBVFIFO_SIZE_G(x) (((x) >> T6_DBVFIFO_SIZE_S) & T6_DBVFIFO_SIZE_M)
 
+#define SGE_CTXT_CMD_A 0x11fc
+
+#define BUSY_S    31
+#define BUSY_V(x) ((x) << BUSY_S)
+#define BUSY_F    BUSY_V(1U)
+
+#define CTXTTYPE_S    24
+#define CTXTTYPE_M    0x3U
+#define CTXTTYPE_V(x) ((x) << CTXTTYPE_S)
+
+#define CTXTQID_S    0
+#define CTXTQID_M    0x1ffffU
+#define CTXTQID_V(x) ((x) << CTXTQID_S)
+
+#define SGE_CTXT_DATA0_A 0x1200
+#define SGE_CTXT_DATA5_A 0x1214
+
 #define GLOBALENABLE_S    0
 #define GLOBALENABLE_V(x) ((x) << GLOBALENABLE_S)
 #define GLOBALENABLE_F    GLOBALENABLE_V(1U)
@@ -319,6 +339,16 @@
 
 #define SGE_IMSG_CTXT_BADDR_A 0x1088
 #define SGE_FLM_CACHE_BADDR_A 0x108c
+#define SGE_FLM_CFG_A 0x1090
+
+#define NOHDR_S    18
+#define NOHDR_V(x) ((x) << NOHDR_S)
+#define NOHDR_F    NOHDR_V(1U)
+
+#define HDRSTARTFLQ_S    11
+#define HDRSTARTFLQ_M    0x7U
+#define HDRSTARTFLQ_G(x) (((x) >> HDRSTARTFLQ_S) & HDRSTARTFLQ_M)
+
 #define SGE_INGRESS_RX_THRESHOLD_A 0x10a0
 
 #define THRESHOLD_0_S    24
@@ -1415,6 +1445,7 @@
 #define ROWINDEX_V(x) ((x) << ROWINDEX_S)
 
 #define TP_CCTRL_TABLE_A	0x7ddc
+#define TP_PACE_TABLE_A 0x7dd8
 #define TP_MTU_TABLE_A		0x7de4
 
 #define MTUINDEX_S    24
@@ -1447,6 +1478,17 @@
 #define LKPTBLQUEUE0_M    0x3ffU
 #define LKPTBLQUEUE0_G(x) (((x) >> LKPTBLQUEUE0_S) & LKPTBLQUEUE0_M)
 
+#define TP_TM_PIO_ADDR_A 0x7e18
+#define TP_TM_PIO_DATA_A 0x7e1c
+#define TP_MOD_CONFIG_A 0x7e24
+
+#define TIMERMODE_S    8
+#define TIMERMODE_M    0xffU
+#define TIMERMODE_G(x) (((x) >> TIMERMODE_S) & TIMERMODE_M)
+
+#define TP_TX_MOD_Q1_Q0_TIMER_SEPARATOR_A 0x3
+#define TP_TX_MOD_Q1_Q0_RATE_LIMIT_A 0x8
+
 #define TP_PIO_ADDR_A	0x7e40
 #define TP_PIO_DATA_A	0x7e44
 #define TP_MIB_INDEX_A	0x7e50
@@ -1627,6 +1669,10 @@
 #define IESPI_PAR_ERROR_V(x) ((x) << IESPI_PAR_ERROR_S)
 #define IESPI_PAR_ERROR_F    IESPI_PAR_ERROR_V(1U)
 
+#define ULP_TX_LA_RDPTR_0_A 0x8ec0
+#define ULP_TX_LA_RDDATA_0_A 0x8ec4
+#define ULP_TX_LA_WRPTR_0_A 0x8ec8
+
 #define PMRX_E_PCMD_PAR_ERROR_S    0
 #define PMRX_E_PCMD_PAR_ERROR_V(x) ((x) << PMRX_E_PCMD_PAR_ERROR_S)
 #define PMRX_E_PCMD_PAR_ERROR_F    PMRX_E_PCMD_PAR_ERROR_V(1U)
@@ -2257,6 +2303,35 @@
 #define CHNENABLE_V(x) ((x) << CHNENABLE_S)
 #define CHNENABLE_F    CHNENABLE_V(1U)
 
+#define LE_DB_DBGI_CONFIG_A 0x19cf0
+
+#define DBGICMDBUSY_S    3
+#define DBGICMDBUSY_V(x) ((x) << DBGICMDBUSY_S)
+#define DBGICMDBUSY_F    DBGICMDBUSY_V(1U)
+
+#define DBGICMDSTRT_S    2
+#define DBGICMDSTRT_V(x) ((x) << DBGICMDSTRT_S)
+#define DBGICMDSTRT_F    DBGICMDSTRT_V(1U)
+
+#define DBGICMDMODE_S    0
+#define DBGICMDMODE_M    0x3U
+#define DBGICMDMODE_V(x) ((x) << DBGICMDMODE_S)
+
+#define LE_DB_DBGI_REQ_TCAM_CMD_A 0x19cf4
+
+#define DBGICMD_S    20
+#define DBGICMD_M    0xfU
+#define DBGICMD_V(x) ((x) << DBGICMD_S)
+
+#define DBGITID_S    0
+#define DBGITID_M    0xfffffU
+#define DBGITID_V(x) ((x) << DBGITID_S)
+
+#define LE_DB_DBGI_REQ_DATA_A 0x19d00
+#define LE_DB_DBGI_RSP_STATUS_A 0x19d94
+
+#define LE_DB_DBGI_RSP_DATA_A 0x19da0
+
 #define PRTENABLE_S    29
 #define PRTENABLE_V(x) ((x) << PRTENABLE_S)
 #define PRTENABLE_F    PRTENABLE_V(1U)
@@ -2433,6 +2508,18 @@
 #define MPS_CLS_TCAM_DATA0_A 0xf000
 #define MPS_CLS_TCAM_DATA1_A 0xf004
 
+#define CTLREQID_S    30
+#define CTLREQID_V(x) ((x) << CTLREQID_S)
+
+#define MPS_VF_RPLCT_MAP0_A 0x1111c
+#define MPS_VF_RPLCT_MAP1_A 0x11120
+#define MPS_VF_RPLCT_MAP2_A 0x11124
+#define MPS_VF_RPLCT_MAP3_A 0x11128
+#define MPS_VF_RPLCT_MAP4_A 0x11300
+#define MPS_VF_RPLCT_MAP5_A 0x11304
+#define MPS_VF_RPLCT_MAP6_A 0x11308
+#define MPS_VF_RPLCT_MAP7_A 0x1130c
+
 #define VIDL_S    16
 #define VIDL_M    0xffffU
 #define VIDL_G(x) (((x) >> VIDL_S) & VIDL_M)
@@ -2457,6 +2544,10 @@
 #define DATAVIDH1_M    0x7fU
 #define DATAVIDH1_G(x) (((x) >> DATAVIDH1_S) & DATAVIDH1_M)
 
+#define MPS_CLS_TCAM_RDATA0_REQ_ID1_A 0xf020
+#define MPS_CLS_TCAM_RDATA1_REQ_ID1_A 0xf024
+#define MPS_CLS_TCAM_RDATA2_REQ_ID1_A 0xf028
+
 #define USED_S    16
 #define USED_M    0x7ffU
 #define USED_G(x) (((x) >> USED_S) & USED_M)
@@ -2850,10 +2941,20 @@
 #define T6_LIPMISS_F    T6_LIPMISS_V(1U)
 
 #define LE_DB_CONFIG_A 0x19c04
+#define LE_DB_ROUTING_TABLE_INDEX_A 0x19c10
+#define LE_DB_ACTIVE_TABLE_START_INDEX_A 0x19c10
+#define LE_DB_FILTER_TABLE_INDEX_A 0x19c14
 #define LE_DB_SERVER_INDEX_A 0x19c18
 #define LE_DB_SRVR_START_INDEX_A 0x19c18
+#define LE_DB_CLIP_TABLE_INDEX_A 0x19c1c
 #define LE_DB_ACT_CNT_IPV4_A 0x19c20
 #define LE_DB_ACT_CNT_IPV6_A 0x19c24
+#define LE_DB_HASH_CONFIG_A 0x19c28
+
+#define HASHTIDSIZE_S    16
+#define HASHTIDSIZE_M    0x3fU
+#define HASHTIDSIZE_G(x) (((x) >> HASHTIDSIZE_S) & HASHTIDSIZE_M)
+
 #define LE_DB_HASH_TID_BASE_A 0x19c30
 #define LE_DB_HASH_TBL_BASE_ADDR_A 0x19c30
 #define LE_DB_INT_CAUSE_A 0x19c3c
@@ -2900,6 +3001,23 @@
 #define SSRAMINTPERR_V(x) ((x) << SSRAMINTPERR_S)
 #define SSRAMINTPERR_F    SSRAMINTPERR_V(1U)
 
+#define LE_DB_RSP_CODE_0_A	0x19c74
+
+#define TCAM_ACTV_HIT_S		0
+#define TCAM_ACTV_HIT_M		0x1fU
+#define TCAM_ACTV_HIT_V(x)	((x) << TCAM_ACTV_HIT_S)
+#define TCAM_ACTV_HIT_G(x)	(((x) >> TCAM_ACTV_HIT_S) & TCAM_ACTV_HIT_M)
+
+#define LE_DB_RSP_CODE_1_A     0x19c78
+
+#define HASH_ACTV_HIT_S		25
+#define HASH_ACTV_HIT_M		0x1fU
+#define HASH_ACTV_HIT_V(x)	((x) << HASH_ACTV_HIT_S)
+#define HASH_ACTV_HIT_G(x)	(((x) >> HASH_ACTV_HIT_S) & HASH_ACTV_HIT_M)
+
+#define LE_3_DB_HASH_MASK_GEN_IPV4_T6_A	0x19eac
+#define LE_4_DB_HASH_MASK_GEN_IPV4_T6_A	0x19eb0
+
 #define NCSI_INT_CAUSE_A 0x1a0d8
 
 #define CIM_DM_PRTY_ERR_S    8
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
new file mode 100644
index 000000000000..3297ce025e8b
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
@@ -0,0 +1,69 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __T4_TCB_H
+#define __T4_TCB_H
+
+#define TCB_SMAC_SEL_W		0
+#define TCB_SMAC_SEL_S		24
+#define TCB_SMAC_SEL_M		0xffULL
+#define TCB_SMAC_SEL_V(x)	((x) << TCB_SMAC_SEL_S)
+
+#define TCB_T_FLAGS_W		1
+
+#define TF_CCTRL_ECE_S		60
+#define TF_CCTRL_CWR_S		61
+#define TF_CCTRL_RFR_S		62
+
+#define TCB_RSS_INFO_W		3
+#define TCB_RSS_INFO_S		0
+#define TCB_RSS_INFO_M		0x3ffULL
+#define TCB_RSS_INFO_V(x)	((x) << TCB_RSS_INFO_S)
+
+#define TCB_TIMESTAMP_W		5
+#define TCB_TIMESTAMP_S		0
+#define TCB_TIMESTAMP_M		0xffffffffULL
+#define TCB_TIMESTAMP_V(x)	((x) << TCB_TIMESTAMP_S)
+
+#define TCB_RTT_TS_RECENT_AGE_W		6
+#define TCB_RTT_TS_RECENT_AGE_S		0
+#define TCB_RTT_TS_RECENT_AGE_M		0xffffffffULL
+#define TCB_RTT_TS_RECENT_AGE_V(x)	((x) << TCB_RTT_TS_RECENT_AGE_S)
+
+#define TCB_SND_UNA_RAW_W	10
+#define TCB_RX_FRAG2_PTR_RAW_W	27
+#define TCB_RX_FRAG3_LEN_RAW_W	29
+#define TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W	30
+#define TCB_PDU_HDR_LEN_W	31
+#endif /* __T4_TCB_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ca2756dcefc5..57eb4ad3485d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -105,7 +105,8 @@ enum fw_wr_opcodes {
 	FW_ISCSI_TX_DATA_WR	       = 0x45,
 	FW_PTP_TX_PKT_WR               = 0x46,
 	FW_CRYPTO_LOOKASIDE_WR         = 0X6d,
-	FW_LASTC2E_WR                  = 0x70
+	FW_LASTC2E_WR                  = 0x70,
+	FW_FILTER2_WR		       = 0x77
 };
 
 struct fw_wr_hdr {
@@ -201,6 +202,51 @@ struct fw_filter_wr {
 	__u8   sma[6];
 };
 
+struct fw_filter2_wr {
+	__be32 op_pkd;
+	__be32 len16_pkd;
+	__be64 r3;
+	__be32 tid_to_iq;
+	__be32 del_filter_to_l2tix;
+	__be16 ethtype;
+	__be16 ethtypem;
+	__u8   frag_to_ovlan_vldm;
+	__u8   smac_sel;
+	__be16 rx_chan_rx_rpl_iq;
+	__be32 maci_to_matchtypem;
+	__u8   ptcl;
+	__u8   ptclm;
+	__u8   ttyp;
+	__u8   ttypm;
+	__be16 ivlan;
+	__be16 ivlanm;
+	__be16 ovlan;
+	__be16 ovlanm;
+	__u8   lip[16];
+	__u8   lipm[16];
+	__u8   fip[16];
+	__u8   fipm[16];
+	__be16 lp;
+	__be16 lpm;
+	__be16 fp;
+	__be16 fpm;
+	__be16 r7;
+	__u8   sma[6];
+	__be16 r8;
+	__u8   filter_type_swapmac;
+	__u8   natmode_to_ulp_type;
+	__be16 newlport;
+	__be16 newfport;
+	__u8   newlip[16];
+	__u8   newfip[16];
+	__be32 natseqcheck;
+	__be32 r9;
+	__be64 r10;
+	__be64 r11;
+	__be64 r12;
+	__be64 r13;
+};
+
 #define FW_FILTER_WR_TID_S      12
 #define FW_FILTER_WR_TID_M      0xfffff
 #define FW_FILTER_WR_TID_V(x)   ((x) << FW_FILTER_WR_TID_S)
@@ -385,6 +431,32 @@ struct fw_filter_wr {
 #define FW_FILTER_WR_RX_RPL_IQ_G(x)     \
 	(((x) >> FW_FILTER_WR_RX_RPL_IQ_S) & FW_FILTER_WR_RX_RPL_IQ_M)
 
+#define FW_FILTER2_WR_FILTER_TYPE_S	1
+#define FW_FILTER2_WR_FILTER_TYPE_M	0x1
+#define FW_FILTER2_WR_FILTER_TYPE_V(x)	((x) << FW_FILTER2_WR_FILTER_TYPE_S)
+#define FW_FILTER2_WR_FILTER_TYPE_G(x)  \
+	(((x) >> FW_FILTER2_WR_FILTER_TYPE_S) & FW_FILTER2_WR_FILTER_TYPE_M)
+#define FW_FILTER2_WR_FILTER_TYPE_F	FW_FILTER2_WR_FILTER_TYPE_V(1U)
+
+#define FW_FILTER2_WR_NATMODE_S		5
+#define FW_FILTER2_WR_NATMODE_M		0x7
+#define FW_FILTER2_WR_NATMODE_V(x)	((x) << FW_FILTER2_WR_NATMODE_S)
+#define FW_FILTER2_WR_NATMODE_G(x)      \
+	(((x) >> FW_FILTER2_WR_NATMODE_S) & FW_FILTER2_WR_NATMODE_M)
+
+#define FW_FILTER2_WR_NATFLAGCHECK_S	4
+#define FW_FILTER2_WR_NATFLAGCHECK_M	0x1
+#define FW_FILTER2_WR_NATFLAGCHECK_V(x)	((x) << FW_FILTER2_WR_NATFLAGCHECK_S)
+#define FW_FILTER2_WR_NATFLAGCHECK_G(x) \
+	(((x) >> FW_FILTER2_WR_NATFLAGCHECK_S) & FW_FILTER2_WR_NATFLAGCHECK_M)
+#define FW_FILTER2_WR_NATFLAGCHECK_F	FW_FILTER2_WR_NATFLAGCHECK_V(1U)
+
+#define FW_FILTER2_WR_ULP_TYPE_S	0
+#define FW_FILTER2_WR_ULP_TYPE_M	0xf
+#define FW_FILTER2_WR_ULP_TYPE_V(x)	((x) << FW_FILTER2_WR_ULP_TYPE_S)
+#define FW_FILTER2_WR_ULP_TYPE_G(x)     \
+	(((x) >> FW_FILTER2_WR_ULP_TYPE_S) & FW_FILTER2_WR_ULP_TYPE_M)
+
 #define FW_FILTER_WR_MACI_S     23
 #define FW_FILTER_WR_MACI_M     0x1ff
 #define FW_FILTER_WR_MACI_V(x)  ((x) << FW_FILTER_WR_MACI_S)
@@ -1020,6 +1092,7 @@ enum fw_caps_config_switch {
 enum fw_caps_config_nic {
 	FW_CAPS_CONFIG_NIC		= 0x00000001,
 	FW_CAPS_CONFIG_NIC_VM		= 0x00000002,
+	FW_CAPS_CONFIG_NIC_HASHFILTER	= 0x00000020,
 };
 
 enum fw_caps_config_ofld {
@@ -1127,6 +1200,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A,
 	FW_PARAMS_PARAM_DEV_VPDREV = 0x1B,
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
+	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
 };
 
@@ -1171,9 +1245,12 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_EQ_END	= 0x2C,
 	FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_START = 0x2D,
 	FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_END = 0x2E,
+	FW_PARAMS_PARAM_PFVF_ETHOFLD_START = 0x2F,
 	FW_PARAMS_PARAM_PFVF_ETHOFLD_END = 0x30,
 	FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP = 0x31,
-	FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x32,
+	FW_PARAMS_PARAM_PFVF_HPFILTER_START = 0x32,
+	FW_PARAMS_PARAM_PFVF_HPFILTER_END = 0x33,
+	FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39,
 	FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A,
 };
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 8996ebbd222e..b48361cfdc78 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1401,6 +1401,63 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev,
 	return 0;
 }
 
+/* Translate the Firmware FEC value into the ethtool value. */
+static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
+{
+	unsigned int eth_fec = 0;
+
+	if (fw_fec & FW_PORT_CAP32_FEC_RS)
+		eth_fec |= ETHTOOL_FEC_RS;
+	if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
+		eth_fec |= ETHTOOL_FEC_BASER;
+
+	/* if nothing is set, then FEC is off */
+	if (!eth_fec)
+		eth_fec = ETHTOOL_FEC_OFF;
+
+	return eth_fec;
+}
+
+/* Translate Common Code FEC value into ethtool value. */
+static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
+{
+	unsigned int eth_fec = 0;
+
+	if (cc_fec & FEC_AUTO)
+		eth_fec |= ETHTOOL_FEC_AUTO;
+	if (cc_fec & FEC_RS)
+		eth_fec |= ETHTOOL_FEC_RS;
+	if (cc_fec & FEC_BASER_RS)
+		eth_fec |= ETHTOOL_FEC_BASER;
+
+	/* if nothing is set, then FEC is off */
+	if (!eth_fec)
+		eth_fec = ETHTOOL_FEC_OFF;
+
+	return eth_fec;
+}
+
+static int cxgb4vf_get_fecparam(struct net_device *dev,
+				struct ethtool_fecparam *fec)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	const struct link_config *lc = &pi->link_cfg;
+
+	/* Translate the Firmware FEC Support into the ethtool value.  We
+	 * always support IEEE 802.3 "automatic" selection of Link FEC type if
+	 * any FEC is supported.
+	 */
+	fec->fec = fwcap_to_eth_fec(lc->pcaps);
+	if (fec->fec != ETHTOOL_FEC_OFF)
+		fec->fec |= ETHTOOL_FEC_AUTO;
+
+	/* Translate the current internal FEC parameters into the
+	 * ethtool values.
+	 */
+	fec->active_fec = cc_to_eth_fec(lc->fec);
+	return 0;
+}
+
 /*
  * Return our driver information.
  */
@@ -1774,6 +1831,7 @@ static void cxgb4vf_get_wol(struct net_device *dev,
 
 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
 	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
+	.get_fecparam		= cxgb4vf_get_fecparam,
 	.get_drvinfo		= cxgb4vf_get_drvinfo,
 	.get_msglevel		= cxgb4vf_get_msglevel,
 	.set_msglevel		= cxgb4vf_set_msglevel,
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 05498e7f2840..14d7e673c656 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -2058,9 +2058,9 @@ irq_handler_t t4vf_intr_handler(struct adapter *adapter)
  *	when out of memory a queue can become empty.  We schedule NAPI to do
  *	the actual refill.
  */
-static void sge_rx_timer_cb(unsigned long data)
+static void sge_rx_timer_cb(struct timer_list *t)
 {
-	struct adapter *adapter = (struct adapter *)data;
+	struct adapter *adapter = from_timer(adapter, t, sge.rx_timer);
 	struct sge *s = &adapter->sge;
 	unsigned int i;
 
@@ -2117,9 +2117,9 @@ static void sge_rx_timer_cb(unsigned long data)
  *	when no new packets are being submitted.  This is essential for pktgen,
  *	at least.
  */
-static void sge_tx_timer_cb(unsigned long data)
+static void sge_tx_timer_cb(struct timer_list *t)
 {
-	struct adapter *adapter = (struct adapter *)data;
+	struct adapter *adapter = from_timer(adapter, t, sge.tx_timer);
 	struct sge *s = &adapter->sge;
 	unsigned int i, budget;
 
@@ -2676,8 +2676,8 @@ int t4vf_sge_init(struct adapter *adapter)
 	/*
 	 * Set up tasklet timers.
 	 */
-	setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adapter);
-	setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adapter);
+	timer_setup(&s->rx_timer, sge_rx_timer_cb, 0);
+	timer_setup(&s->tx_timer, sge_tx_timer_cb, 0);
 
 	/*
 	 * Initialize Forwarded Interrupt Queue lock.
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index a8d94963b4d0..67aec59a14e6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -1812,7 +1812,7 @@ int t4vf_eth_eq_free(struct adapter *adapter, unsigned int eqid)
  *
  *	Returns a string representation of the Link Down Reason Code.
  */
-const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
+static const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
 {
 	static const char * const reason[] = {
 		"Link Down",
@@ -1838,8 +1838,8 @@ const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
  *
  *	Processes a GET_PORT_INFO FW reply message.
  */
-void t4vf_handle_get_port_info(struct port_info *pi,
-			       const struct fw_port_cmd *cmd)
+static void t4vf_handle_get_port_info(struct port_info *pi,
+				      const struct fw_port_cmd *cmd)
 {
 	int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
 	struct adapter *adapter = pi->adapter;
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index ba032ac9ae86..6a9527004cb1 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -33,7 +33,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"2.3.0.42"
+#define DRV_VERSION		"2.3.0.45"
 #define DRV_COPYRIGHT		"Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX		6
diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h
index d3c4a1cb0610..0ae83e091a62 100644
--- a/drivers/net/ethernet/cisco/enic/enic_clsf.h
+++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h
@@ -20,9 +20,8 @@ void enic_flow_may_expire(unsigned long data);
 
 static inline void enic_rfs_timer_start(struct enic *enic)
 {
-	init_timer(&enic->rfs_h.rfs_may_expire);
-	enic->rfs_h.rfs_may_expire.function = enic_flow_may_expire;
-	enic->rfs_h.rfs_may_expire.data = (unsigned long)enic;
+	setup_timer(&enic->rfs_h.rfs_may_expire, enic_flow_may_expire,
+		    (unsigned long)enic);
 	mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4);
 }
 
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index fd3980cc1e34..462d0ce51240 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -176,6 +176,81 @@ static void enic_get_strings(struct net_device *netdev, u32 stringset,
 	}
 }
 
+static void enic_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring)
+{
+	struct enic *enic = netdev_priv(netdev);
+	struct vnic_enet_config *c = &enic->config;
+
+	ring->rx_max_pending = ENIC_MAX_RQ_DESCS;
+	ring->rx_pending = c->rq_desc_count;
+	ring->tx_max_pending = ENIC_MAX_WQ_DESCS;
+	ring->tx_pending = c->wq_desc_count;
+}
+
+static int enic_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct enic *enic = netdev_priv(netdev);
+	struct vnic_enet_config *c = &enic->config;
+	int running = netif_running(netdev);
+	unsigned int rx_pending;
+	unsigned int tx_pending;
+	int err = 0;
+
+	if (ring->rx_mini_max_pending || ring->rx_mini_pending) {
+		netdev_info(netdev,
+			    "modifying mini ring params is not supported");
+		return -EINVAL;
+	}
+	if (ring->rx_jumbo_max_pending || ring->rx_jumbo_pending) {
+		netdev_info(netdev,
+			    "modifying jumbo ring params is not supported");
+		return -EINVAL;
+	}
+	rx_pending = c->rq_desc_count;
+	tx_pending = c->wq_desc_count;
+	if (ring->rx_pending > ENIC_MAX_RQ_DESCS ||
+	    ring->rx_pending < ENIC_MIN_RQ_DESCS) {
+		netdev_info(netdev, "rx pending (%u) not in range [%u,%u]",
+			    ring->rx_pending, ENIC_MIN_RQ_DESCS,
+			    ENIC_MAX_RQ_DESCS);
+		return -EINVAL;
+	}
+	if (ring->tx_pending > ENIC_MAX_WQ_DESCS ||
+	    ring->tx_pending < ENIC_MIN_WQ_DESCS) {
+		netdev_info(netdev, "tx pending (%u) not in range [%u,%u]",
+			    ring->tx_pending, ENIC_MIN_WQ_DESCS,
+			    ENIC_MAX_WQ_DESCS);
+		return -EINVAL;
+	}
+	if (running)
+		dev_close(netdev);
+	c->rq_desc_count =
+		ring->rx_pending & 0xffffffe0; /* must be aligned to groups of 32 */
+	c->wq_desc_count =
+		ring->tx_pending & 0xffffffe0; /* must be aligned to groups of 32 */
+	enic_free_vnic_resources(enic);
+	err = enic_alloc_vnic_resources(enic);
+	if (err) {
+		netdev_err(netdev,
+			   "Failed to alloc vNIC resources, aborting\n");
+		enic_free_vnic_resources(enic);
+		goto err_out;
+	}
+	enic_init_vnic_resources(enic);
+	if (running) {
+		err = dev_open(netdev);
+		if (err)
+			goto err_out;
+	}
+	return 0;
+err_out:
+	c->rq_desc_count = rx_pending;
+	c->wq_desc_count = tx_pending;
+	return err;
+}
+
 static int enic_get_sset_count(struct net_device *netdev, int sset)
 {
 	switch (sset) {
@@ -509,6 +584,8 @@ static const struct ethtool_ops enic_ethtool_ops = {
 	.set_msglevel = enic_set_msglevel,
 	.get_link = ethtool_op_get_link,
 	.get_strings = enic_get_strings,
+	.get_ringparam = enic_get_ringparam,
+	.set_ringparam = enic_set_ringparam,
 	.get_sset_count = enic_get_sset_count,
 	.get_ethtool_stats = enic_get_ethtool_stats,
 	.get_coalesce = enic_get_coalesce,
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index d24ee1ad3be1..4a11baffe02d 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2846,9 +2846,8 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Setup notification timer, HW reset task, and wq locks
 	 */
 
-	init_timer(&enic->notify_timer);
-	enic->notify_timer.function = enic_notify_timer;
-	enic->notify_timer.data = (unsigned long)enic;
+	setup_timer(&enic->notify_timer, enic_notify_timer,
+		    (unsigned long)enic);
 
 	enic_set_rx_coal_setting(enic);
 	INIT_WORK(&enic->reset, enic_reset);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c
index 36bc2c71fba9..f8aa326d1d58 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_rq.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c
@@ -139,20 +139,8 @@ void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index,
 	unsigned int error_interrupt_enable,
 	unsigned int error_interrupt_offset)
 {
-	u32 fetch_index = 0;
-
-	/* Use current fetch_index as the ring starting point */
-	fetch_index = ioread32(&rq->ctrl->fetch_index);
-
-	if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone  */
-		/* Hardware surprise removal: reset fetch_index */
-		fetch_index = 0;
-	}
-
-	vnic_rq_init_start(rq, cq_index,
-		fetch_index, fetch_index,
-		error_interrupt_enable,
-		error_interrupt_offset);
+	vnic_rq_init_start(rq, cq_index, 0, 0, error_interrupt_enable,
+			   error_interrupt_offset);
 }
 
 unsigned int vnic_rq_error_status(struct vnic_rq *rq)
diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index c87b8cc42963..13430f75496c 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -333,8 +333,8 @@ static void de_set_rx_mode (struct net_device *dev);
 static void de_tx (struct de_private *de);
 static void de_clean_rings (struct de_private *de);
 static void de_media_interrupt (struct de_private *de, u32 status);
-static void de21040_media_timer (unsigned long data);
-static void de21041_media_timer (unsigned long data);
+static void de21040_media_timer (struct timer_list *t);
+static void de21041_media_timer (struct timer_list *t);
 static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media);
 
 
@@ -959,9 +959,9 @@ static void de_next_media (struct de_private *de, const u32 *media,
 	}
 }
 
-static void de21040_media_timer (unsigned long data)
+static void de21040_media_timer (struct timer_list *t)
 {
-	struct de_private *de = (struct de_private *) data;
+	struct de_private *de = from_timer(de, t, media_timer);
 	struct net_device *dev = de->dev;
 	u32 status = dr32(SIAStatus);
 	unsigned int carrier;
@@ -1040,9 +1040,9 @@ static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media)
 	return 1;
 }
 
-static void de21041_media_timer (unsigned long data)
+static void de21041_media_timer (struct timer_list *t)
 {
-	struct de_private *de = (struct de_private *) data;
+	struct de_private *de = from_timer(de, t, media_timer);
 	struct net_device *dev = de->dev;
 	u32 status = dr32(SIAStatus);
 	unsigned int carrier;
@@ -1999,12 +1999,9 @@ static int de_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	de->msg_enable = (debug < 0 ? DE_DEF_MSG_ENABLE : debug);
 	de->board_idx = board_idx;
 	spin_lock_init (&de->lock);
-	init_timer(&de->media_timer);
-	if (de->de21040)
-		de->media_timer.function = de21040_media_timer;
-	else
-		de->media_timer.function = de21041_media_timer;
-	de->media_timer.data = (unsigned long) de;
+	timer_setup(&de->media_timer,
+		    de->de21040 ? de21040_media_timer : de21041_media_timer,
+		    0);
 
 	netif_carrier_off(dev);
 
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 0affee9c8aa2..a31b4df3e7ff 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -912,7 +912,7 @@ static int     de4x5_init(struct net_device *dev);
 static int     de4x5_sw_reset(struct net_device *dev);
 static int     de4x5_rx(struct net_device *dev);
 static int     de4x5_tx(struct net_device *dev);
-static void    de4x5_ast(struct net_device *dev);
+static void    de4x5_ast(struct timer_list *t);
 static int     de4x5_txur(struct net_device *dev);
 static int     de4x5_rx_ovfc(struct net_device *dev);
 
@@ -1147,9 +1147,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
 	lp->timeout = -1;
 	lp->gendev = gendev;
 	spin_lock_init(&lp->lock);
-	init_timer(&lp->timer);
-	lp->timer.function = (void (*)(unsigned long))de4x5_ast;
-	lp->timer.data = (unsigned long)dev;
+	timer_setup(&lp->timer, de4x5_ast, 0);
 	de4x5_parse_params(dev);
 
 	/*
@@ -1742,9 +1740,10 @@ de4x5_tx(struct net_device *dev)
 }
 
 static void
-de4x5_ast(struct net_device *dev)
+de4x5_ast(struct timer_list *t)
 {
-	struct de4x5_private *lp = netdev_priv(dev);
+	struct de4x5_private *lp = from_timer(lp, t, timer);
+	struct net_device *dev = dev_get_drvdata(lp->gendev);
 	int next_tick = DE4X5_AUTOSENSE_MS;
 	int dt;
 
@@ -2370,7 +2369,7 @@ autoconf_media(struct net_device *dev)
 	lp->media = INIT;
 	lp->tcount = 0;
 
-	de4x5_ast(dev);
+	de4x5_ast(&lp->timer);
 
 	return lp->media;
 }
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 07e10a45beaa..17ef7a28873d 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -331,7 +331,7 @@ static void dmfe_phy_write_1bit(void __iomem *, u32);
 static u16 dmfe_phy_read_1bit(void __iomem *);
 static u8 dmfe_sense_speed(struct dmfe_board_info *);
 static void dmfe_process_mode(struct dmfe_board_info *);
-static void dmfe_timer(unsigned long);
+static void dmfe_timer(struct timer_list *);
 static inline u32 cal_CRC(unsigned char *, unsigned int, u8);
 static void dmfe_rx_packet(struct net_device *, struct dmfe_board_info *);
 static void dmfe_free_tx_pkt(struct net_device *, struct dmfe_board_info *);
@@ -596,10 +596,8 @@ static int dmfe_open(struct net_device *dev)
 	netif_wake_queue(dev);
 
 	/* set and active a timer process */
-	init_timer(&db->timer);
+	timer_setup(&db->timer, dmfe_timer, 0);
 	db->timer.expires = DMFE_TIMER_WUT + HZ * 2;
-	db->timer.data = (unsigned long)dev;
-	db->timer.function = dmfe_timer;
 	add_timer(&db->timer);
 
 	return 0;
@@ -1130,10 +1128,10 @@ static const struct ethtool_ops netdev_ethtool_ops = {
  *	Dynamic media sense, allocate Rx buffer...
  */
 
-static void dmfe_timer(unsigned long data)
+static void dmfe_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct dmfe_board_info *db = netdev_priv(dev);
+	struct dmfe_board_info *db = from_timer(db, t, timer);
+	struct net_device *dev = pci_get_drvdata(db->pdev);
 	void __iomem *ioaddr = db->ioaddr;
 	u32 tmp_cr8;
 	unsigned char tmp_cr12;
diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c
index 8df80880ecaa..c1ca0765d56d 100644
--- a/drivers/net/ethernet/dec/tulip/interrupt.c
+++ b/drivers/net/ethernet/dec/tulip/interrupt.c
@@ -102,10 +102,10 @@ int tulip_refill_rx(struct net_device *dev)
 
 #ifdef CONFIG_TULIP_NAPI
 
-void oom_timer(unsigned long data)
+void oom_timer(struct timer_list *t)
 {
-        struct net_device *dev = (struct net_device *)data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp = from_timer(tp, t, oom_timer);
+
 	napi_schedule(&tp->napi);
 }
 
diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c
index 7bcccf5cac7a..3fb39e32e1b4 100644
--- a/drivers/net/ethernet/dec/tulip/pnic.c
+++ b/drivers/net/ethernet/dec/tulip/pnic.c
@@ -84,10 +84,10 @@ void pnic_lnk_change(struct net_device *dev, int csr5)
 	}
 }
 
-void pnic_timer(unsigned long data)
+void pnic_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp = from_timer(tp, t, timer);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->base_addr;
 	int next_tick = 60*HZ;
 
diff --git a/drivers/net/ethernet/dec/tulip/pnic2.c b/drivers/net/ethernet/dec/tulip/pnic2.c
index 5895fc43f6e0..412adaa7fdf8 100644
--- a/drivers/net/ethernet/dec/tulip/pnic2.c
+++ b/drivers/net/ethernet/dec/tulip/pnic2.c
@@ -76,10 +76,10 @@
 #include <linux/delay.h>
 
 
-void pnic2_timer(unsigned long data)
+void pnic2_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp = from_timer(tp, t, timer);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->base_addr;
 	int next_tick = 60*HZ;
 
diff --git a/drivers/net/ethernet/dec/tulip/timer.c b/drivers/net/ethernet/dec/tulip/timer.c
index 523d9dde50a2..642e9dfc5451 100644
--- a/drivers/net/ethernet/dec/tulip/timer.c
+++ b/drivers/net/ethernet/dec/tulip/timer.c
@@ -137,10 +137,10 @@ void tulip_media_task(struct work_struct *work)
 }
 
 
-void mxic_timer(unsigned long data)
+void mxic_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp = from_timer(tp, t, timer);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->base_addr;
 	int next_tick = 60*HZ;
 
@@ -154,10 +154,10 @@ void mxic_timer(unsigned long data)
 }
 
 
-void comet_timer(unsigned long data)
+void comet_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp = from_timer(tp, t, timer);
+	struct net_device *dev = tp->dev;
 	int next_tick = 2*HZ;
 
 	if (tulip_debug > 1)
diff --git a/drivers/net/ethernet/dec/tulip/tulip.h b/drivers/net/ethernet/dec/tulip/tulip.h
index 06660dbc44b7..b458140aeaef 100644
--- a/drivers/net/ethernet/dec/tulip/tulip.h
+++ b/drivers/net/ethernet/dec/tulip/tulip.h
@@ -43,7 +43,7 @@ struct tulip_chip_table {
 	int io_size;
 	int valid_intrs;	/* CSR7 interrupt enable settings */
 	int flags;
-	void (*media_timer) (unsigned long);
+	void (*media_timer) (struct timer_list *);
 	work_func_t media_task;
 };
 
@@ -476,7 +476,7 @@ void t21142_lnk_change(struct net_device *dev, int csr5);
 
 /* PNIC2.c */
 void pnic2_lnk_change(struct net_device *dev, int csr5);
-void pnic2_timer(unsigned long data);
+void pnic2_timer(struct timer_list *t);
 void pnic2_start_nway(struct net_device *dev);
 void pnic2_lnk_change(struct net_device *dev, int csr5);
 
@@ -504,19 +504,19 @@ void tulip_find_mii (struct net_device *dev, int board_idx);
 /* pnic.c */
 void pnic_do_nway(struct net_device *dev);
 void pnic_lnk_change(struct net_device *dev, int csr5);
-void pnic_timer(unsigned long data);
+void pnic_timer(struct timer_list *t);
 
 /* timer.c */
 void tulip_media_task(struct work_struct *work);
-void mxic_timer(unsigned long data);
-void comet_timer(unsigned long data);
+void mxic_timer(struct timer_list *t);
+void comet_timer(struct timer_list *t);
 
 /* tulip_core.c */
 extern int tulip_debug;
 extern const char * const medianame[];
 extern const char tulip_media_cap[];
 extern const struct tulip_chip_table tulip_tbl[];
-void oom_timer(unsigned long data);
+void oom_timer(struct timer_list *t);
 extern u8 t21040_csr13[];
 
 static inline void tulip_start_rxtx(struct tulip_private *tp)
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 851b6d1f5a42..00d02a0967d0 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -123,10 +123,10 @@ int tulip_debug = TULIP_DEBUG;
 int tulip_debug = 1;
 #endif
 
-static void tulip_timer(unsigned long data)
+static void tulip_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp = from_timer(tp, t, timer);
+	struct net_device *dev = tp->dev;
 
 	if (netif_running(dev))
 		schedule_work(&tp->media_work);
@@ -505,7 +505,7 @@ media_picked:
 	tp->timer.expires = RUN_AT(next_tick);
 	add_timer(&tp->timer);
 #ifdef CONFIG_TULIP_NAPI
-	setup_timer(&tp->oom_timer, oom_timer, (unsigned long)dev);
+	timer_setup(&tp->oom_timer, oom_timer, 0);
 #endif
 }
 
@@ -780,8 +780,7 @@ static void tulip_down (struct net_device *dev)
 
 	spin_unlock_irqrestore (&tp->lock, flags);
 
-	setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer,
-		    (unsigned long)dev);
+	timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0);
 
 	dev->if_port = tp->saved_if_port;
 
@@ -1470,8 +1469,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->csr0 = csr0;
 	spin_lock_init(&tp->lock);
 	spin_lock_init(&tp->mii_lock);
-	setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer,
-		    (unsigned long)dev);
+	timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0);
 
 	INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task);
 
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index 7fc248efc4ba..488a744084c9 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -241,7 +241,7 @@ static void phy_write_1bit(struct uli526x_board_info *db, u32);
 static u16 phy_read_1bit(struct uli526x_board_info *db);
 static u8 uli526x_sense_speed(struct uli526x_board_info *);
 static void uli526x_process_mode(struct uli526x_board_info *);
-static void uli526x_timer(unsigned long);
+static void uli526x_timer(struct timer_list *t);
 static void uli526x_rx_packet(struct net_device *, struct uli526x_board_info *);
 static void uli526x_free_tx_pkt(struct net_device *, struct uli526x_board_info *);
 static void uli526x_reuse_skb(struct uli526x_board_info *, struct sk_buff *);
@@ -491,10 +491,8 @@ static int uli526x_open(struct net_device *dev)
 	netif_wake_queue(dev);
 
 	/* set and active a timer process */
-	init_timer(&db->timer);
+	timer_setup(&db->timer, uli526x_timer, 0);
 	db->timer.expires = ULI526X_TIMER_WUT + HZ * 2;
-	db->timer.data = (unsigned long)dev;
-	db->timer.function = uli526x_timer;
 	add_timer(&db->timer);
 
 	return 0;
@@ -1023,10 +1021,10 @@ static const struct ethtool_ops netdev_ethtool_ops = {
  *	Dynamic media sense, allocate Rx buffer...
  */
 
-static void uli526x_timer(unsigned long data)
+static void uli526x_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct uli526x_board_info *db = netdev_priv(dev);
+	struct uli526x_board_info *db = from_timer(db, t, timer);
+	struct net_device *dev = pci_get_drvdata(db->pdev);
 	struct uli_phy_ops *phy = &db->phy;
 	void __iomem *ioaddr = db->ioaddr;
  	unsigned long flags;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 32d7229544fa..70cb2d689c2c 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -327,7 +327,7 @@ static int  mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
 static int  netdev_open(struct net_device *dev);
 static int  update_link(struct net_device *dev);
-static void netdev_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
 static void init_rxtx_rings(struct net_device *dev);
 static void free_rxtx_rings(struct netdev_private *np);
 static void init_registers(struct net_device *dev);
@@ -655,10 +655,8 @@ static int netdev_open(struct net_device *dev)
 		netdev_dbg(dev, "Done netdev_open()\n");
 
 	/* Set the timer to check for link beat. */
-	init_timer(&np->timer);
+	timer_setup(&np->timer, netdev_timer, 0);
 	np->timer.expires = jiffies + 1*HZ;
-	np->timer.data = (unsigned long)dev;
-	np->timer.function = netdev_timer;				/* timer handler */
 	add_timer(&np->timer);
 	return 0;
 out_err:
@@ -774,10 +772,10 @@ static inline void update_csr6(struct net_device *dev, int new)
 		np->mii_if.full_duplex = 1;
 }
 
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_timer(np, t, timer);
+	struct net_device *dev = pci_get_drvdata(np->pci_dev);
 	void __iomem *ioaddr = np->base_addr;
 
 	if (debug > 2)
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 778f974e2928..f0536b16b3c3 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -68,7 +68,7 @@ static const int max_intrloop = 50;
 static const int multicast_filter_limit = 0x40;
 
 static int rio_open (struct net_device *dev);
-static void rio_timer (unsigned long data);
+static void rio_timer (struct timer_list *t);
 static void rio_tx_timeout (struct net_device *dev);
 static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t rio_interrupt (int irq, void *dev_instance);
@@ -313,7 +313,7 @@ find_miiphy (struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	int i, phy_found = 0;
-	np = netdev_priv(dev);
+
 	np->phy_addr = 1;
 
 	for (i = 31; i >= 0; i--) {
@@ -644,7 +644,7 @@ static int rio_open(struct net_device *dev)
 		return i;
 	}
 
-	setup_timer(&np->timer, rio_timer, (unsigned long)dev);
+	timer_setup(&np->timer, rio_timer, 0);
 	np->timer.expires = jiffies + 1 * HZ;
 	add_timer(&np->timer);
 
@@ -655,10 +655,10 @@ static int rio_open(struct net_device *dev)
 }
 
 static void
-rio_timer (unsigned long data)
+rio_timer (struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_timer(np, t, timer);
+	struct net_device *dev = pci_get_drvdata(np->pdev);
 	unsigned int entry;
 	int next_tick = 1*HZ;
 	unsigned long flags;
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 2704bcf023be..1a27176381fb 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -431,7 +431,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val
 static int  mdio_wait_link(struct net_device *dev, int wait);
 static int  netdev_open(struct net_device *dev);
 static void check_duplex(struct net_device *dev);
-static void netdev_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
 static void tx_timeout(struct net_device *dev);
 static void init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
@@ -913,10 +913,8 @@ static int netdev_open(struct net_device *dev)
 			   ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0));
 
 	/* Set the timer to check for link beat. */
-	init_timer(&np->timer);
+	timer_setup(&np->timer, netdev_timer, 0);
 	np->timer.expires = jiffies + 3*HZ;
-	np->timer.data = (unsigned long)dev;
-	np->timer.function = netdev_timer;				/* timer handler */
 	add_timer(&np->timer);
 
 	/* Enable interrupts by setting the interrupt mask. */
@@ -953,10 +951,10 @@ static void check_duplex(struct net_device *dev)
 	}
 }
 
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_timer(np, t, timer);
+	struct net_device *dev = np->mii_if.dev;
 	void __iomem *ioaddr = np->base;
 	int next_tick = 10*HZ;
 
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 9ed8e4b81530..78db8e62a83f 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -21,6 +21,7 @@
 
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
+#include <linux/clk.h>
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -59,6 +60,9 @@
 /* Min number of tx ring entries before stopping queue */
 #define TX_THRESHOLD		(MAX_SKB_FRAGS + 1)
 
+#define FTGMAC_100MHZ		100000000
+#define FTGMAC_25MHZ		25000000
+
 struct ftgmac100 {
 	/* Registers */
 	struct resource *res;
@@ -96,6 +100,7 @@ struct ftgmac100 {
 	struct napi_struct napi;
 	struct work_struct reset_task;
 	struct mii_bus *mii_bus;
+	struct clk *clk;
 
 	/* Link management */
 	int cur_speed;
@@ -1734,6 +1739,22 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd)
 		    nd->link_up ? "up" : "down");
 }
 
+static void ftgmac100_setup_clk(struct ftgmac100 *priv)
+{
+	priv->clk = devm_clk_get(priv->dev, NULL);
+	if (IS_ERR(priv->clk))
+		return;
+
+	clk_prepare_enable(priv->clk);
+
+	/* Aspeed specifies a 100MHz clock is required for up to
+	 * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz
+	 * is sufficient
+	 */
+	clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ :
+			FTGMAC_100MHZ);
+}
+
 static int ftgmac100_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -1830,6 +1851,9 @@ static int ftgmac100_probe(struct platform_device *pdev)
 			goto err_setup_mdio;
 	}
 
+	if (priv->is_aspeed)
+		ftgmac100_setup_clk(priv);
+
 	/* Default ring sizes */
 	priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES;
 	priv->tx_q_entries = priv->new_tx_q_entries = DEF_TX_QUEUE_ENTRIES;
@@ -1883,6 +1907,8 @@ static int ftgmac100_remove(struct platform_device *pdev)
 
 	unregister_netdev(netdev);
 
+	clk_disable_unprepare(priv->clk);
+
 	/* There's a small chance the reset task will have been re-queued,
 	 * during stop, make sure it's gone before we free the structure.
 	 */
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 66928a922824..aecc76504b69 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -402,6 +402,7 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed)
 	struct page *page;
 	dma_addr_t map;
 	int length;
+	bool ret;
 
 	rxdes = ftmac100_rx_locate_first_segment(priv);
 	if (!rxdes)
@@ -416,8 +417,8 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed)
 	 * It is impossible to get multi-segment packets
 	 * because we always provide big enough receive buffers.
 	 */
-	if (unlikely(!ftmac100_rxdes_last_segment(rxdes)))
-		BUG();
+	ret = ftmac100_rxdes_last_segment(rxdes);
+	BUG_ON(!ret);
 
 	/* start processing */
 	skb = netdev_alloc_skb_ip_align(netdev, 128);
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index e92859dab7ae..23053919ebf5 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -426,8 +426,8 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val
 static int netdev_open(struct net_device *dev);
 static void getlinktype(struct net_device *dev);
 static void getlinkstatus(struct net_device *dev);
-static void netdev_timer(unsigned long data);
-static void reset_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
+static void reset_timer(struct timer_list *t);
 static void fealnx_tx_timeout(struct net_device *dev);
 static void init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
@@ -909,17 +909,13 @@ static int netdev_open(struct net_device *dev)
 		printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name);
 
 	/* Set the timer to check for link beat. */
-	init_timer(&np->timer);
+	timer_setup(&np->timer, netdev_timer, 0);
 	np->timer.expires = RUN_AT(3 * HZ);
-	np->timer.data = (unsigned long) dev;
-	np->timer.function = netdev_timer;
 
 	/* timer handler */
 	add_timer(&np->timer);
 
-	init_timer(&np->reset_timer);
-	np->reset_timer.data = (unsigned long) dev;
-	np->reset_timer.function = reset_timer;
+	timer_setup(&np->reset_timer, reset_timer, 0);
 	np->reset_timer_armed = 0;
 	return rc;
 }
@@ -1082,10 +1078,10 @@ static void allocate_rx_buffers(struct net_device *dev)
 }
 
 
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_timer(np, t, timer);
+	struct net_device *dev = np->mii.dev;
 	void __iomem *ioaddr = np->mem;
 	int old_crvalue = np->crvalue;
 	unsigned int old_linkok = np->linkok;
@@ -1171,10 +1167,10 @@ static void enable_rxtx(struct net_device *dev)
 }
 
 
-static void reset_timer(unsigned long data)
+static void reset_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_timer(np, t, reset_timer);
+	struct net_device *dev = np->mii.dev;
 	unsigned long flags;
 
 	printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 42258060f142..7caa8da48421 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -351,7 +351,7 @@ static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
 	u8 num_tc;
 	int i;
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
@@ -385,34 +385,19 @@ out:
 
 static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
 {
-	struct platform_device *of_dev;
 	struct dpaa_eth_data *eth_data;
-	struct device *dpaa_dev, *dev;
-	struct device_node *mac_node;
+	struct device *dpaa_dev;
 	struct mac_device *mac_dev;
 
 	dpaa_dev = &pdev->dev;
 	eth_data = dpaa_dev->platform_data;
-	if (!eth_data)
+	if (!eth_data) {
+		dev_err(dpaa_dev, "eth_data missing\n");
 		return ERR_PTR(-ENODEV);
-
-	mac_node = eth_data->mac_node;
-
-	of_dev = of_find_device_by_node(mac_node);
-	if (!of_dev) {
-		dev_err(dpaa_dev, "of_find_device_by_node(%pOF) failed\n",
-			mac_node);
-		of_node_put(mac_node);
-		return ERR_PTR(-EINVAL);
 	}
-	of_node_put(mac_node);
-
-	dev = &of_dev->dev;
-
-	mac_dev = dev_get_drvdata(dev);
+	mac_dev = eth_data->mac_dev;
 	if (!mac_dev) {
-		dev_err(dpaa_dev, "dev_get_drvdata(%s) failed\n",
-			dev_name(dev));
+		dev_err(dpaa_dev, "mac_dev missing\n");
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -1736,6 +1721,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 
 	/* Iterate through the SGT entries and add data buffers to the skb */
 	sgt = vaddr + fd_off;
+	skb = NULL;
 	for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) {
 		/* Extension bit is not supported */
 		WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
@@ -1753,7 +1739,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
 		count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
 		dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size,
 				 DMA_FROM_DEVICE);
-		if (i == 0) {
+		if (!skb) {
 			sz = dpaa_bp->size +
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 			skb = build_skb(sg_vaddr, sz);
@@ -2435,6 +2421,44 @@ static void dpaa_eth_napi_disable(struct dpaa_priv *priv)
 	}
 }
 
+static void dpaa_adjust_link(struct net_device *net_dev)
+{
+	struct mac_device *mac_dev;
+	struct dpaa_priv *priv;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+	mac_dev->adjust_link(mac_dev);
+}
+
+static int dpaa_phy_init(struct net_device *net_dev)
+{
+	struct mac_device *mac_dev;
+	struct phy_device *phy_dev;
+	struct dpaa_priv *priv;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+
+	phy_dev = of_phy_connect(net_dev, mac_dev->phy_node,
+				 &dpaa_adjust_link, 0,
+				 mac_dev->phy_if);
+	if (!phy_dev) {
+		netif_err(priv, ifup, net_dev, "init_phy() failed\n");
+		return -ENODEV;
+	}
+
+	/* Remove any features not supported by the controller */
+	phy_dev->supported &= mac_dev->if_support;
+	phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+	phy_dev->advertising = phy_dev->supported;
+
+	mac_dev->phy_dev = phy_dev;
+	net_dev->phydev = phy_dev;
+
+	return 0;
+}
+
 static int dpaa_open(struct net_device *net_dev)
 {
 	struct mac_device *mac_dev;
@@ -2445,12 +2469,9 @@ static int dpaa_open(struct net_device *net_dev)
 	mac_dev = priv->mac_dev;
 	dpaa_eth_napi_enable(priv);
 
-	net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev);
-	if (!net_dev->phydev) {
-		netif_err(priv, ifup, net_dev, "init_phy() failed\n");
-		err = -ENODEV;
+	err = dpaa_phy_init(net_dev);
+	if (err)
 		goto phy_init_failed;
-	}
 
 	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
 		err = fman_port_enable(mac_dev->port[i]);
@@ -2649,7 +2670,6 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
 static int dpaa_eth_probe(struct platform_device *pdev)
 {
 	struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL};
-	struct dpaa_percpu_priv *percpu_priv;
 	struct net_device *net_dev = NULL;
 	struct dpaa_fq *dpaa_fq, *tmp;
 	struct dpaa_priv *priv = NULL;
@@ -2658,7 +2678,13 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	int err = 0, i, channel;
 	struct device *dev;
 
-	dev = &pdev->dev;
+	/* device used for DMA mapping */
+	dev = pdev->dev.parent;
+	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
+	if (err) {
+		dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
+		return err;
+	}
 
 	/* Allocate this early, so we can store relevant information in
 	 * the private area
@@ -2666,7 +2692,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM);
 	if (!net_dev) {
 		dev_err(dev, "alloc_etherdev_mq() failed\n");
-		goto alloc_etherdev_mq_failed;
+		return -ENOMEM;
 	}
 
 	/* Do this here, so we can be verbose early */
@@ -2682,7 +2708,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	if (IS_ERR(mac_dev)) {
 		dev_err(dev, "dpaa_mac_dev_get() failed\n");
 		err = PTR_ERR(mac_dev);
-		goto mac_probe_failed;
+		goto free_netdev;
 	}
 
 	/* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
@@ -2700,21 +2726,13 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */
 	priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
 
-	/* device used for DMA mapping */
-	set_dma_ops(dev, get_dma_ops(&pdev->dev));
-	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
-	if (err) {
-		dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
-		goto dev_mask_failed;
-	}
-
 	/* bp init */
 	for (i = 0; i < DPAA_BPS_NUM; i++) {
-		int err;
-
 		dpaa_bps[i] = dpaa_bp_alloc(dev);
-		if (IS_ERR(dpaa_bps[i]))
-			return PTR_ERR(dpaa_bps[i]);
+		if (IS_ERR(dpaa_bps[i])) {
+			err = PTR_ERR(dpaa_bps[i]);
+			goto free_dpaa_bps;
+		}
 		/* the raw size of the buffers used for reception */
 		dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM);
 		/* avoid runtime computations by keeping the usable size here */
@@ -2722,11 +2740,8 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 		dpaa_bps[i]->dev = dev;
 
 		err = dpaa_bp_alloc_pool(dpaa_bps[i]);
-		if (err < 0) {
-			dpaa_bps_free(priv);
-			priv->dpaa_bps[i] = NULL;
-			goto bp_create_failed;
-		}
+		if (err < 0)
+			goto free_dpaa_bps;
 		priv->dpaa_bps[i] = dpaa_bps[i];
 	}
 
@@ -2737,7 +2752,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs);
 	if (err < 0) {
 		dev_err(dev, "dpaa_alloc_all_fqs() failed\n");
-		goto fq_probe_failed;
+		goto free_dpaa_bps;
 	}
 
 	priv->mac_dev = mac_dev;
@@ -2746,7 +2761,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	if (channel < 0) {
 		dev_err(dev, "dpaa_get_channel() failed\n");
 		err = channel;
-		goto get_channel_failed;
+		goto free_dpaa_bps;
 	}
 
 	priv->channel = (u16)channel;
@@ -2766,20 +2781,20 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	err = dpaa_eth_cgr_init(priv);
 	if (err < 0) {
 		dev_err(dev, "Error initializing CGR\n");
-		goto tx_cgr_init_failed;
+		goto free_dpaa_bps;
 	}
 
 	err = dpaa_ingress_cgr_init(priv);
 	if (err < 0) {
 		dev_err(dev, "Error initializing ingress CGR\n");
-		goto rx_cgr_init_failed;
+		goto delete_egress_cgr;
 	}
 
 	/* Add the FQs to the interface, and make them active */
 	list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) {
 		err = dpaa_fq_init(dpaa_fq, false);
 		if (err < 0)
-			goto fq_alloc_failed;
+			goto free_dpaa_fqs;
 	}
 
 	priv->tx_headroom = dpaa_get_headroom(&priv->buf_layout[TX]);
@@ -2789,7 +2804,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
 				  &priv->buf_layout[0], dev);
 	if (err)
-		goto init_ports_failed;
+		goto free_dpaa_fqs;
 
 	/* Rx traffic distribution based on keygen hashing defaults to on */
 	priv->keygen_in_use = true;
@@ -2798,11 +2813,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	if (!priv->percpu_priv) {
 		dev_err(dev, "devm_alloc_percpu() failed\n");
 		err = -ENOMEM;
-		goto alloc_percpu_failed;
-	}
-	for_each_possible_cpu(i) {
-		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
-		memset(percpu_priv, 0, sizeof(*percpu_priv));
+		goto free_dpaa_fqs;
 	}
 
 	priv->num_tc = 1;
@@ -2811,11 +2822,11 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	/* Initialize NAPI */
 	err = dpaa_napi_add(net_dev);
 	if (err < 0)
-		goto napi_add_failed;
+		goto delete_dpaa_napi;
 
 	err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout);
 	if (err < 0)
-		goto netdev_init_failed;
+		goto delete_dpaa_napi;
 
 	dpaa_eth_sysfs_init(&net_dev->dev);
 
@@ -2824,32 +2835,21 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 
 	return 0;
 
-netdev_init_failed:
-napi_add_failed:
+delete_dpaa_napi:
 	dpaa_napi_del(net_dev);
-alloc_percpu_failed:
-init_ports_failed:
+free_dpaa_fqs:
 	dpaa_fq_free(dev, &priv->dpaa_fq_list);
-fq_alloc_failed:
 	qman_delete_cgr_safe(&priv->ingress_cgr);
 	qman_release_cgrid(priv->ingress_cgr.cgrid);
-rx_cgr_init_failed:
+delete_egress_cgr:
 	qman_delete_cgr_safe(&priv->cgr_data.cgr);
 	qman_release_cgrid(priv->cgr_data.cgr.cgrid);
-tx_cgr_init_failed:
-get_channel_failed:
+free_dpaa_bps:
 	dpaa_bps_free(priv);
-bp_create_failed:
-fq_probe_failed:
-dev_mask_failed:
-mac_probe_failed:
+free_netdev:
 	dev_set_drvdata(dev, NULL);
 	free_netdev(net_dev);
-alloc_etherdev_mq_failed:
-	for (i = 0; i < DPAA_BPS_NUM && dpaa_bps[i]; i++) {
-		if (atomic_read(&dpaa_bps[i]->refs) == 0)
-			devm_kfree(dev, dpaa_bps[i]);
-	}
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 44720f83af27..5385074b3b7d 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -583,12 +583,11 @@ struct fec_enet_private {
 	u64 ethtool_stats[0];
 };
 
-void fec_ptp_init(struct platform_device *pdev);
+void fec_ptp_init(struct platform_device *pdev, int irq_idx);
 void fec_ptp_stop(struct platform_device *pdev);
 void fec_ptp_start_cyclecounter(struct net_device *ndev);
 int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
 int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
-uint fec_ptp_check_pps_event(struct fec_enet_private *fep);
 
 /****************************************************************************/
 #endif /* FEC_H */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 3dc2d771a222..610573855213 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1602,10 +1602,6 @@ fec_enet_interrupt(int irq, void *dev_id)
 		ret = IRQ_HANDLED;
 		complete(&fep->mdio_done);
 	}
-
-	if (fep->ptp_clock)
-		if (fec_ptp_check_pps_event(fep))
-			ret = IRQ_HANDLED;
 	return ret;
 }
 
@@ -3312,6 +3308,19 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx)
 
 }
 
+static int fec_enet_get_irq_cnt(struct platform_device *pdev)
+{
+	int irq_cnt = platform_irq_count(pdev);
+
+	if (irq_cnt > FEC_IRQ_NUM)
+		irq_cnt = FEC_IRQ_NUM;	/* last for pps */
+	else if (irq_cnt == 2)
+		irq_cnt = 1;	/* last for pps */
+	else if (irq_cnt <= 0)
+		irq_cnt = 1;	/* At least 1 irq is needed */
+	return irq_cnt;
+}
+
 static int
 fec_probe(struct platform_device *pdev)
 {
@@ -3325,6 +3334,8 @@ fec_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node, *phy_node;
 	int num_tx_qs;
 	int num_rx_qs;
+	char irq_name[8];
+	int irq_cnt;
 
 	fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs);
 
@@ -3465,18 +3476,20 @@ fec_probe(struct platform_device *pdev)
 	if (ret)
 		goto failed_reset;
 
+	irq_cnt = fec_enet_get_irq_cnt(pdev);
 	if (fep->bufdesc_ex)
-		fec_ptp_init(pdev);
+		fec_ptp_init(pdev, irq_cnt);
 
 	ret = fec_enet_init(ndev);
 	if (ret)
 		goto failed_init;
 
-	for (i = 0; i < FEC_IRQ_NUM; i++) {
-		irq = platform_get_irq(pdev, i);
+	for (i = 0; i < irq_cnt; i++) {
+		sprintf(irq_name, "int%d", i);
+		irq = platform_get_irq_byname(pdev, irq_name);
+		if (irq < 0)
+			irq = platform_get_irq(pdev, i);
 		if (irq < 0) {
-			if (i)
-				break;
 			ret = irq;
 			goto failed_irq;
 		}
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 6ebad3fac81d..f81439796ac7 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -549,6 +549,37 @@ static void fec_time_keep(struct work_struct *work)
 	schedule_delayed_work(&fep->time_keep, HZ);
 }
 
+/* This function checks the pps event and reloads the timer compare counter. */
+static irqreturn_t fec_pps_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	u32 val;
+	u8 channel = fep->pps_channel;
+	struct ptp_clock_event event;
+
+	val = readl(fep->hwp + FEC_TCSR(channel));
+	if (val & FEC_T_TF_MASK) {
+		/* Write the next next compare(not the next according the spec)
+		 * value to the register
+		 */
+		writel(fep->next_counter, fep->hwp + FEC_TCCR(channel));
+		do {
+			writel(val, fep->hwp + FEC_TCSR(channel));
+		} while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK);
+
+		/* Update the counter; */
+		fep->next_counter = (fep->next_counter + fep->reload_period) &
+				fep->cc.mask;
+
+		event.type = PTP_CLOCK_PPS;
+		ptp_clock_event(fep->ptp_clock, &event);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
 /**
  * fec_ptp_init
  * @ndev: The FEC network adapter
@@ -558,10 +589,12 @@ static void fec_time_keep(struct work_struct *work)
  * cyclecounter init routine and exits.
  */
 
-void fec_ptp_init(struct platform_device *pdev)
+void fec_ptp_init(struct platform_device *pdev, int irq_idx)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	int irq;
+	int ret;
 
 	fep->ptp_caps.owner = THIS_MODULE;
 	snprintf(fep->ptp_caps.name, 16, "fec ptp");
@@ -587,6 +620,20 @@ void fec_ptp_init(struct platform_device *pdev)
 
 	INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep);
 
+	irq = platform_get_irq_byname(pdev, "pps");
+	if (irq < 0)
+		irq = platform_get_irq(pdev, irq_idx);
+	/* Failure to get an irq is not fatal,
+	 * only the PTP_CLOCK_PPS clock events should stop
+	 */
+	if (irq >= 0) {
+		ret = devm_request_irq(&pdev->dev, irq, fec_pps_interrupt,
+				       0, pdev->name, ndev);
+		if (ret < 0)
+			dev_warn(&pdev->dev, "request for pps irq failed(%d)\n",
+				 ret);
+	}
+
 	fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev);
 	if (IS_ERR(fep->ptp_clock)) {
 		fep->ptp_clock = NULL;
@@ -605,36 +652,3 @@ void fec_ptp_stop(struct platform_device *pdev)
 	if (fep->ptp_clock)
 		ptp_clock_unregister(fep->ptp_clock);
 }
-
-/**
- * fec_ptp_check_pps_event
- * @fep: the fec_enet_private structure handle
- *
- * This function check the pps event and reload the timer compare counter.
- */
-uint fec_ptp_check_pps_event(struct fec_enet_private *fep)
-{
-	u32 val;
-	u8 channel = fep->pps_channel;
-	struct ptp_clock_event event;
-
-	val = readl(fep->hwp + FEC_TCSR(channel));
-	if (val & FEC_T_TF_MASK) {
-		/* Write the next next compare(not the next according the spec)
-		 * value to the register
-		 */
-		writel(fep->next_counter, fep->hwp + FEC_TCCR(channel));
-		do {
-			writel(val, fep->hwp + FEC_TCSR(channel));
-		} while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK);
-
-		/* Update the counter; */
-		fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask;
-
-		event.type = PTP_CLOCK_PPS;
-		ptp_clock_event(fep->ptp_clock, &event);
-		return 1;
-	}
-
-	return 0;
-}
diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile
index f83a3653b63b..b618091db091 100644
--- a/drivers/net/ethernet/freescale/fman/Makefile
+++ b/drivers/net/ethernet/freescale/fman/Makefile
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 subdir-ccflags-y +=  -I$(srctree)/drivers/net/ethernet/freescale/fman
 
-obj-$(CONFIG_FSL_FMAN) += fsl_fman.o
-obj-$(CONFIG_FSL_FMAN) += fsl_fman_port.o
-obj-$(CONFIG_FSL_FMAN) += fsl_mac.o
+obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman.o
+obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman_port.o
+obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_mac.o
 
-fsl_fman-objs	:= fman_muram.o fman.o fman_sp.o fman_keygen.o
-fsl_fman_port-objs := fman_port.o
-fsl_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o
+fsl_dpaa_fman-objs	:= fman_muram.o fman.o fman_sp.o fman_keygen.o
+fsl_dpaa_fman_port-objs := fman_port.o
+fsl_dpaa_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 1789b206be58..6552d68ea6e1 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -1339,8 +1339,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params)
 	switch (port->port_type) {
 	case FMAN_PORT_TYPE_RX:
 		set_rx_dflt_cfg(port, params);
+		/* fall through */
 	case FMAN_PORT_TYPE_TX:
 		set_tx_dflt_cfg(port, params, &port->dts_params);
+		/* fall through */
 	default:
 		set_dflt_cfg(port, params);
 	}
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 387eb4a88b72..88c0a0636b44 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -57,9 +57,7 @@ struct mac_priv_s {
 	struct device			*dev;
 	void __iomem			*vaddr;
 	u8				cell_index;
-	phy_interface_t			phy_if;
 	struct fman			*fman;
-	struct device_node		*phy_node;
 	struct device_node		*internal_phy_node;
 	/* List of multicast addresses */
 	struct list_head		mc_addr_list;
@@ -106,7 +104,7 @@ static void set_fman_mac_params(struct mac_device *mac_dev,
 			     resource_size(mac_dev->res));
 	memcpy(&params->addr, mac_dev->addr, sizeof(mac_dev->addr));
 	params->max_speed	= priv->max_speed;
-	params->phy_if		= priv->phy_if;
+	params->phy_if		= mac_dev->phy_if;
 	params->basex_if	= false;
 	params->mac_id		= priv->cell_index;
 	params->fm		= (void *)priv->fman;
@@ -419,15 +417,12 @@ void fman_get_pause_cfg(struct mac_device *mac_dev, bool *rx_pause,
 }
 EXPORT_SYMBOL(fman_get_pause_cfg);
 
-static void adjust_link_void(struct net_device *net_dev)
+static void adjust_link_void(struct mac_device *mac_dev)
 {
 }
 
-static void adjust_link_dtsec(struct net_device *net_dev)
+static void adjust_link_dtsec(struct mac_device *mac_dev)
 {
-	struct device *dev = net_dev->dev.parent;
-	struct dpaa_eth_data *eth_data = dev->platform_data;
-	struct mac_device *mac_dev = eth_data->mac_dev;
 	struct phy_device *phy_dev = mac_dev->phy_dev;
 	struct fman_mac *fman_mac;
 	bool rx_pause, tx_pause;
@@ -444,14 +439,12 @@ static void adjust_link_dtsec(struct net_device *net_dev)
 	fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
 	err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
 	if (err < 0)
-		netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err);
+		dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n",
+			err);
 }
 
-static void adjust_link_memac(struct net_device *net_dev)
+static void adjust_link_memac(struct mac_device *mac_dev)
 {
-	struct device *dev = net_dev->dev.parent;
-	struct dpaa_eth_data *eth_data = dev->platform_data;
-	struct mac_device *mac_dev = eth_data->mac_dev;
 	struct phy_device *phy_dev = mac_dev->phy_dev;
 	struct fman_mac *fman_mac;
 	bool rx_pause, tx_pause;
@@ -463,60 +456,12 @@ static void adjust_link_memac(struct net_device *net_dev)
 	fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
 	err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
 	if (err < 0)
-		netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err);
-}
-
-/* Initializes driver's PHY state, and attaches to the PHY.
- * Returns 0 on success.
- */
-static struct phy_device *init_phy(struct net_device *net_dev,
-				   struct mac_device *mac_dev,
-				   void (*adj_lnk)(struct net_device *))
-{
-	struct phy_device	*phy_dev;
-	struct mac_priv_s	*priv = mac_dev->priv;
-
-	phy_dev = of_phy_connect(net_dev, priv->phy_node, adj_lnk, 0,
-				 priv->phy_if);
-	if (!phy_dev) {
-		netdev_err(net_dev, "Could not connect to PHY\n");
-		return NULL;
-	}
-
-	/* Remove any features not supported by the controller */
-	phy_dev->supported &= mac_dev->if_support;
-	/* Enable the symmetric and asymmetric PAUSE frame advertisements,
-	 * as most of the PHY drivers do not enable them by default.
-	 */
-	phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-	phy_dev->advertising = phy_dev->supported;
-
-	mac_dev->phy_dev = phy_dev;
-
-	return phy_dev;
-}
-
-static struct phy_device *dtsec_init_phy(struct net_device *net_dev,
-					 struct mac_device *mac_dev)
-{
-	return init_phy(net_dev, mac_dev, &adjust_link_dtsec);
-}
-
-static struct phy_device *tgec_init_phy(struct net_device *net_dev,
-					struct mac_device *mac_dev)
-{
-	return init_phy(net_dev, mac_dev, adjust_link_void);
-}
-
-static struct phy_device *memac_init_phy(struct net_device *net_dev,
-					 struct mac_device *mac_dev)
-{
-	return init_phy(net_dev, mac_dev, &adjust_link_memac);
+		dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n",
+			err);
 }
 
 static void setup_dtsec(struct mac_device *mac_dev)
 {
-	mac_dev->init_phy		= dtsec_init_phy;
 	mac_dev->init			= dtsec_initialization;
 	mac_dev->set_promisc		= dtsec_set_promiscuous;
 	mac_dev->change_addr		= dtsec_modify_mac_address;
@@ -528,14 +473,13 @@ static void setup_dtsec(struct mac_device *mac_dev)
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
-
+	mac_dev->adjust_link            = adjust_link_dtsec;
 	mac_dev->priv->enable		= dtsec_enable;
 	mac_dev->priv->disable		= dtsec_disable;
 }
 
 static void setup_tgec(struct mac_device *mac_dev)
 {
-	mac_dev->init_phy		= tgec_init_phy;
 	mac_dev->init			= tgec_initialization;
 	mac_dev->set_promisc		= tgec_set_promiscuous;
 	mac_dev->change_addr		= tgec_modify_mac_address;
@@ -547,14 +491,13 @@ static void setup_tgec(struct mac_device *mac_dev)
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
-
+	mac_dev->adjust_link            = adjust_link_void;
 	mac_dev->priv->enable		= tgec_enable;
 	mac_dev->priv->disable		= tgec_disable;
 }
 
 static void setup_memac(struct mac_device *mac_dev)
 {
-	mac_dev->init_phy		= memac_init_phy;
 	mac_dev->init			= memac_initialization;
 	mac_dev->set_promisc		= memac_set_promiscuous;
 	mac_dev->change_addr		= memac_modify_mac_address;
@@ -566,7 +509,7 @@ static void setup_memac(struct mac_device *mac_dev)
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
-
+	mac_dev->adjust_link            = adjust_link_memac;
 	mac_dev->priv->enable		= memac_enable;
 	mac_dev->priv->disable		= memac_disable;
 }
@@ -599,8 +542,7 @@ static const u16 phy2speed[] = {
 };
 
 static struct platform_device *dpaa_eth_add_device(int fman_id,
-						   struct mac_device *mac_dev,
-						   struct device_node *node)
+						   struct mac_device *mac_dev)
 {
 	struct platform_device *pdev;
 	struct dpaa_eth_data data;
@@ -613,17 +555,14 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
 	data.mac_dev = mac_dev;
 	data.mac_hw_id = priv->cell_index;
 	data.fman_hw_id = fman_id;
-	data.mac_node = node;
 
 	mutex_lock(&eth_lock);
-
 	pdev = platform_device_alloc("dpaa-ethernet", dpaa_eth_dev_cnt);
 	if (!pdev) {
 		ret = -ENOMEM;
 		goto no_mem;
 	}
 
-	pdev->dev.of_node = node;
 	pdev->dev.parent = priv->dev;
 	set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
 
@@ -676,7 +615,6 @@ static int mac_probe(struct platform_device *_of_dev)
 	mac_dev = devm_kzalloc(dev, sizeof(*mac_dev), GFP_KERNEL);
 	if (!mac_dev) {
 		err = -ENOMEM;
-		dev_err(dev, "devm_kzalloc() = %d\n", err);
 		goto _return;
 	}
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -706,9 +644,6 @@ static int mac_probe(struct platform_device *_of_dev)
 		goto _return;
 	}
 
-	/* Register mac_dev */
-	dev_set_drvdata(dev, mac_dev);
-
 	INIT_LIST_HEAD(&priv->mc_addr_list);
 
 	/* Get the FM node */
@@ -717,7 +652,7 @@ static int mac_probe(struct platform_device *_of_dev)
 		dev_err(dev, "of_get_parent(%pOF) failed\n",
 			mac_node);
 		err = -EINVAL;
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 
 	of_dev = of_find_device_by_node(dev_node);
@@ -751,7 +686,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	if (err < 0) {
 		dev_err(dev, "of_address_to_resource(%pOF) = %d\n",
 			mac_node, err);
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 
 	mac_dev->res = __devm_request_region(dev,
@@ -761,7 +696,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	if (!mac_dev->res) {
 		dev_err(dev, "__devm_request_mem_region(mac) failed\n");
 		err = -EBUSY;
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 
 	priv->vaddr = devm_ioremap(dev, mac_dev->res->start,
@@ -769,16 +704,12 @@ static int mac_probe(struct platform_device *_of_dev)
 	if (!priv->vaddr) {
 		dev_err(dev, "devm_ioremap() failed\n");
 		err = -EIO;
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 
 	if (!of_device_is_available(mac_node)) {
-		devm_iounmap(dev, priv->vaddr);
-		__devm_release_region(dev, fman_get_mem_region(priv->fman),
-				      res.start, res.end + 1 - res.start);
-		devm_kfree(dev, mac_dev);
-		dev_set_drvdata(dev, NULL);
-		return -ENODEV;
+		err = -ENODEV;
+		goto _return_of_get_parent;
 	}
 
 	/* Get the cell-index */
@@ -786,7 +717,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	if (err) {
 		dev_err(dev, "failed to read cell-index for %pOF\n", mac_node);
 		err = -EINVAL;
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 	priv->cell_index = (u8)val;
 
@@ -795,7 +726,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	if (!mac_addr) {
 		dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node);
 		err = -EINVAL;
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 	memcpy(mac_dev->addr, mac_addr, sizeof(mac_dev->addr));
 
@@ -805,14 +736,14 @@ static int mac_probe(struct platform_device *_of_dev)
 		dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n",
 			mac_node);
 		err = nph;
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 
 	if (nph != ARRAY_SIZE(mac_dev->port)) {
 		dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n",
 			mac_node);
 		err = -EINVAL;
-		goto _return_dev_set_drvdata;
+		goto _return_of_get_parent;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
@@ -851,13 +782,13 @@ static int mac_probe(struct platform_device *_of_dev)
 			 mac_node);
 		phy_if = PHY_INTERFACE_MODE_SGMII;
 	}
-	priv->phy_if = phy_if;
+	mac_dev->phy_if = phy_if;
 
-	priv->speed		= phy2speed[priv->phy_if];
+	priv->speed		= phy2speed[mac_dev->phy_if];
 	priv->max_speed		= priv->speed;
 	mac_dev->if_support	= DTSEC_SUPPORTED;
 	/* We don't support half-duplex in SGMII mode */
-	if (priv->phy_if == PHY_INTERFACE_MODE_SGMII)
+	if (mac_dev->phy_if == PHY_INTERFACE_MODE_SGMII)
 		mac_dev->if_support &= ~(SUPPORTED_10baseT_Half |
 					SUPPORTED_100baseT_Half);
 
@@ -866,30 +797,31 @@ static int mac_probe(struct platform_device *_of_dev)
 		mac_dev->if_support |= SUPPORTED_1000baseT_Full;
 
 	/* The 10G interface only supports one mode */
-	if (priv->phy_if == PHY_INTERFACE_MODE_XGMII)
+	if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
 		mac_dev->if_support = SUPPORTED_10000baseT_Full;
 
 	/* Get the rest of the PHY information */
-	priv->phy_node = of_parse_phandle(mac_node, "phy-handle", 0);
-	if (!priv->phy_node && of_phy_is_fixed_link(mac_node)) {
+	mac_dev->phy_node = of_parse_phandle(mac_node, "phy-handle", 0);
+	if (!mac_dev->phy_node && of_phy_is_fixed_link(mac_node)) {
 		struct phy_device *phy;
 
 		err = of_phy_register_fixed_link(mac_node);
 		if (err)
-			goto _return_dev_set_drvdata;
+			goto _return_of_get_parent;
 
 		priv->fixed_link = kzalloc(sizeof(*priv->fixed_link),
 					   GFP_KERNEL);
 		if (!priv->fixed_link) {
 			err = -ENOMEM;
-			goto _return_dev_set_drvdata;
+			goto _return_of_get_parent;
 		}
 
-		priv->phy_node = of_node_get(mac_node);
-		phy = of_phy_find_device(priv->phy_node);
+		mac_dev->phy_node = of_node_get(mac_node);
+		phy = of_phy_find_device(mac_dev->phy_node);
 		if (!phy) {
 			err = -EINVAL;
-			goto _return_dev_set_drvdata;
+			of_node_put(mac_dev->phy_node);
+			goto _return_of_get_parent;
 		}
 
 		priv->fixed_link->link = phy->link;
@@ -904,8 +836,8 @@ static int mac_probe(struct platform_device *_of_dev)
 	err = mac_dev->init(mac_dev);
 	if (err < 0) {
 		dev_err(dev, "mac_dev->init() = %d\n", err);
-		of_node_put(priv->phy_node);
-		goto _return_dev_set_drvdata;
+		of_node_put(mac_dev->phy_node);
+		goto _return_of_get_parent;
 	}
 
 	/* pause frame autonegotiation enabled */
@@ -926,7 +858,7 @@ static int mac_probe(struct platform_device *_of_dev)
 		 mac_dev->addr[0], mac_dev->addr[1], mac_dev->addr[2],
 		 mac_dev->addr[3], mac_dev->addr[4], mac_dev->addr[5]);
 
-	priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev, mac_node);
+	priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev);
 	if (IS_ERR(priv->eth_dev)) {
 		dev_err(dev, "failed to add Ethernet platform device for MAC %d\n",
 			priv->cell_index);
@@ -937,9 +869,8 @@ static int mac_probe(struct platform_device *_of_dev)
 
 _return_of_node_put:
 	of_node_put(dev_node);
-_return_dev_set_drvdata:
+_return_of_get_parent:
 	kfree(priv->fixed_link);
-	dev_set_drvdata(dev, NULL);
 _return:
 	return err;
 }
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index d7313f0c5135..eefb3357e304 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -50,6 +50,8 @@ struct mac_device {
 	struct fman_port	*port[2];
 	u32			 if_support;
 	struct phy_device	*phy_dev;
+	phy_interface_t		phy_if;
+	struct device_node	*phy_node;
 
 	bool autoneg_pause;
 	bool rx_pause_req;
@@ -58,11 +60,10 @@ struct mac_device {
 	bool tx_pause_active;
 	bool promisc;
 
-	struct phy_device *(*init_phy)(struct net_device *net_dev,
-				       struct mac_device *mac_dev);
 	int (*init)(struct mac_device *mac_dev);
 	int (*start)(struct mac_device *mac_dev);
 	int (*stop)(struct mac_device *mac_dev);
+	void (*adjust_link)(struct mac_device *mac_dev);
 	int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
 	int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
 	int (*set_multi)(struct net_device *net_dev,
@@ -82,7 +83,6 @@ struct mac_device {
 };
 
 struct dpaa_eth_data {
-	struct device_node *mac_node;
 	struct mac_device *mac_dev;
 	int mac_hw_id;
 	int fman_hw_id;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 753259091b22..7892f2f0c6b5 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -1023,8 +1023,6 @@ static int fs_enet_probe(struct platform_device *ofdev)
 
 	ndev->ethtool_ops = &fs_ethtool_ops;
 
-	init_timer(&fep->phy_timer_list);
-
 	netif_carrier_off(ndev);
 
 	ndev->features |= NETIF_F_SG;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index 168e10ea487f..92e06b37a199 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -138,7 +138,6 @@ struct fs_enet_private {
 	cbd_t __iomem *cur_rx;
 	cbd_t __iomem *cur_tx;
 	int tx_free;
-	struct timer_list phy_timer_list;
 	const struct phy_info *phy;
 	u32 msg_enable;
 	struct mii_if_info mii_if;
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index f77ba9fa257b..a96b838cffce 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3857,8 +3857,9 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	}
 
 	if (netif_msg_probe(&debug))
-		pr_info("UCC%1d at 0x%8x (irq = %d)\n",
-			ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs,
+		pr_info("UCC%1d at 0x%8llx (irq = %d)\n",
+			ug_info->uf_info.ucc_num + 1,
+			(u64)ug_info->uf_info.regs,
 			ug_info->uf_info.irq);
 
 	/* Create an ethernet device instance */
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 91c7bdb9b43c..30000b6aa7b8 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -78,7 +78,7 @@ config HNS_ENET
 
 config HNS3
 	tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
-    depends on PCI
+	depends on PCI
 	---help---
 	  This selects the framework support for Hisilicon Network Subsystem 3.
 	  This layer facilitates clients like ENET, RoCE and user-space ethernet
@@ -87,7 +87,7 @@ config HNS3
 
 config HNS3_HCLGE
 	tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
-    depends on PCI_MSI
+	depends on PCI_MSI
 	depends on HNS3
 	---help---
 	  This selects the HNS3_HCLGE network acceleration engine & its hardware
@@ -96,11 +96,20 @@ config HNS3_HCLGE
 
 config HNS3_ENET
 	tristate "Hisilicon HNS3 Ethernet Device Support"
-    depends on 64BIT && PCI
+	depends on 64BIT && PCI
 	depends on HNS3 && HNS3_HCLGE
 	---help---
 	  This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
 	  family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
 	  devices and their associated operations.
 
+config HNS3_DCB
+	bool "Hisilicon HNS3 Data Center Bridge Support"
+	default n
+	depends on HNS3 && HNS3_HCLGE && DCB
+	---help---
+	  Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver.
+
+	  If unsure, say N.
+
 endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index e77192683dba..1ccb6443d2ed 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2159,9 +2159,9 @@ static void hns_nic_task_schedule(struct hns_nic_priv *priv)
 		(void)schedule_work(&priv->service_task);
 }
 
-static void hns_nic_service_timer(unsigned long data)
+static void hns_nic_service_timer(struct timer_list *t)
 {
-	struct hns_nic_priv *priv = (struct hns_nic_priv *)data;
+	struct hns_nic_priv *priv = from_timer(priv, t, service_timer);
 
 	(void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ);
 
@@ -2451,8 +2451,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 	/* carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(ndev);
 
-	setup_timer(&priv->service_timer, hns_nic_service_timer,
-		    (unsigned long)priv);
+	timer_setup(&priv->service_timer, hns_nic_service_timer, 0);
 	INIT_WORK(&priv->service_task, hns_nic_service_task);
 
 	set_bit(NIC_STATE_SERVICE_INITED, &priv->state);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 1a01cadfe5f3..67c59e1039f2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -28,6 +28,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/dcbnl.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/module.h>
@@ -109,6 +110,21 @@ enum hnae3_media_type {
 	HNAE3_MEDIA_TYPE_BACKPLANE,
 };
 
+enum hnae3_reset_notify_type {
+	HNAE3_UP_CLIENT,
+	HNAE3_DOWN_CLIENT,
+	HNAE3_INIT_CLIENT,
+	HNAE3_UNINIT_CLIENT,
+};
+
+enum hnae3_reset_type {
+	HNAE3_FUNC_RESET,
+	HNAE3_CORE_RESET,
+	HNAE3_GLOBAL_RESET,
+	HNAE3_IMP_RESET,
+	HNAE3_NONE_RESET,
+};
+
 struct hnae3_vector_info {
 	u8 __iomem *io_addr;
 	int vector;
@@ -131,6 +147,9 @@ struct hnae3_client_ops {
 	int (*init_instance)(struct hnae3_handle *handle);
 	void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
 	void (*link_status_change)(struct hnae3_handle *handle, bool state);
+	int (*setup_tc)(struct hnae3_handle *handle, u8 tc);
+	int (*reset_notify)(struct hnae3_handle *handle,
+			    enum hnae3_reset_notify_type type);
 };
 
 #define HNAE3_CLIENT_NAME_LENGTH 16
@@ -337,6 +356,10 @@ struct hnae3_ae_ops {
 		       u8 *hfunc);
 	int (*set_rss)(struct hnae3_handle *handle, const u32 *indir,
 		       const u8 *key, const u8 hfunc);
+	int (*set_rss_tuple)(struct hnae3_handle *handle,
+			     struct ethtool_rxnfc *cmd);
+	int (*get_rss_tuple)(struct hnae3_handle *handle,
+			     struct ethtool_rxnfc *cmd);
 
 	int (*get_tc_size)(struct hnae3_handle *handle);
 
@@ -361,6 +384,23 @@ struct hnae3_ae_ops {
 			       u16 vlan_id, bool is_kill);
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
 				  u16 vlan, u8 qos, __be16 proto);
+	void (*reset_event)(struct hnae3_handle *handle,
+			    enum hnae3_reset_type reset);
+};
+
+struct hnae3_dcb_ops {
+	/* IEEE 802.1Qaz std */
+	int (*ieee_getets)(struct hnae3_handle *, struct ieee_ets *);
+	int (*ieee_setets)(struct hnae3_handle *, struct ieee_ets *);
+	int (*ieee_getpfc)(struct hnae3_handle *, struct ieee_pfc *);
+	int (*ieee_setpfc)(struct hnae3_handle *, struct ieee_pfc *);
+
+	/* DCBX configuration */
+	u8   (*getdcbx)(struct hnae3_handle *);
+	u8   (*setdcbx)(struct hnae3_handle *, u8);
+
+	int (*map_update)(struct hnae3_handle *);
+	int (*setup_tc)(struct hnae3_handle *, u8, u8 *);
 };
 
 struct hnae3_ae_algo {
@@ -394,6 +434,7 @@ struct hnae3_knic_private_info {
 
 	u16 num_tqps;		  /* total number of TQPs in this handle */
 	struct hnae3_queue **tqp;  /* array base of all TQPs in this instance */
+	const struct hnae3_dcb_ops *dcb_ops;
 };
 
 struct hnae3_roce_private_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 162e8a42acd0..d2b20d01a58c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -7,5 +7,9 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
 obj-$(CONFIG_HNS3_HCLGE) += hclge.o
 hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o
 
+hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
+
 obj-$(CONFIG_HNS3_ENET) += hns3.o
 hns3-objs = hns3_enet.o hns3_ethtool.o
+
+hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 8b511e6e0ce9..ff13d1876d9e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -62,7 +62,7 @@ static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring)
 	ring->desc = NULL;
 }
 
-static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type)
+static int hclge_alloc_cmd_queue(struct hclge_dev *hdev, int ring_type)
 {
 	struct hclge_hw *hw = &hdev->hw;
 	struct hclge_cmq_ring *ring =
@@ -79,12 +79,18 @@ static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type)
 		return ret;
 	}
 
-	ring->next_to_clean = 0;
-	ring->next_to_use = 0;
-
 	return 0;
 }
 
+void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read)
+{
+	desc->flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
+	if (is_read)
+		desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR);
+	else
+		desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+}
+
 void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
 				enum hclge_opcode_type opcode, bool is_read)
 {
@@ -208,7 +214,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 	 * which will be use for hardware to write back
 	 */
 	ntc = hw->cmq.csq.next_to_use;
-	opcode = desc[0].opcode;
+	opcode = le16_to_cpu(desc[0].opcode);
 	while (handle < num) {
 		desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use];
 		*desc_to_use = desc[handle];
@@ -225,7 +231,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 	 * If the command is sync, wait for the firmware to write back,
 	 * if multi descriptors to be sent, use the first one to check
 	 */
-	if (HCLGE_SEND_SYNC(desc->flag)) {
+	if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) {
 		do {
 			if (hclge_cmd_csq_done(hw))
 				break;
@@ -244,9 +250,9 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 			pr_debug("Get cmd desc:\n");
 
 			if (likely(!hclge_is_special_opcode(opcode)))
-				desc_ret = desc[handle].retval;
+				desc_ret = le16_to_cpu(desc[handle].retval);
 			else
-				desc_ret = desc[0].retval;
+				desc_ret = le16_to_cpu(desc[0].retval);
 
 			if ((enum hclge_cmd_return_status)desc_ret ==
 			    HCLGE_CMD_EXEC_SUCCESS)
@@ -276,15 +282,15 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
 	return retval;
 }
 
-enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw,
-						       u32 *version)
+static enum hclge_cmd_status hclge_cmd_query_firmware_version(
+		struct hclge_hw *hw, u32 *version)
 {
-	struct hclge_query_version *resp;
+	struct hclge_query_version_cmd *resp;
 	struct hclge_desc desc;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1);
-	resp = (struct hclge_query_version *)desc.data;
+	resp = (struct hclge_query_version_cmd *)desc.data;
 
 	ret = hclge_cmd_send(hw, &desc, 1);
 	if (!ret)
@@ -293,37 +299,52 @@ enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw,
 	return ret;
 }
 
-int hclge_cmd_init(struct hclge_dev *hdev)
+int hclge_cmd_queue_init(struct hclge_dev *hdev)
 {
-	u32 version;
 	int ret;
 
 	/* Setup the queue entries for use cmd queue */
 	hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
 	hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
 
-	/* Setup the lock for command queue */
-	spin_lock_init(&hdev->hw.cmq.csq.lock);
-	spin_lock_init(&hdev->hw.cmq.crq.lock);
-
 	/* Setup Tx write back timeout */
 	hdev->hw.cmq.tx_timeout = HCLGE_CMDQ_TX_TIMEOUT;
 
 	/* Setup queue rings */
-	ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CSQ);
+	ret = hclge_alloc_cmd_queue(hdev, HCLGE_TYPE_CSQ);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"CSQ ring setup error %d\n", ret);
 		return ret;
 	}
 
-	ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CRQ);
+	ret = hclge_alloc_cmd_queue(hdev, HCLGE_TYPE_CRQ);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"CRQ ring setup error %d\n", ret);
 		goto err_csq;
 	}
 
+	return 0;
+err_csq:
+	hclge_free_cmd_desc(&hdev->hw.cmq.csq);
+	return ret;
+}
+
+int hclge_cmd_init(struct hclge_dev *hdev)
+{
+	u32 version;
+	int ret;
+
+	hdev->hw.cmq.csq.next_to_clean = 0;
+	hdev->hw.cmq.csq.next_to_use = 0;
+	hdev->hw.cmq.crq.next_to_clean = 0;
+	hdev->hw.cmq.crq.next_to_use = 0;
+
+	/* Setup the lock for command queue */
+	spin_lock_init(&hdev->hw.cmq.csq.lock);
+	spin_lock_init(&hdev->hw.cmq.crq.lock);
+
 	hclge_cmd_init_regs(&hdev->hw);
 
 	ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
@@ -337,9 +358,6 @@ int hclge_cmd_init(struct hclge_dev *hdev)
 	dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
 
 	return 0;
-err_csq:
-	hclge_free_cmd_desc(&hdev->hw.cmq.csq);
-	return ret;
 }
 
 static void hclge_destroy_queue(struct hclge_cmq_ring *ring)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 758cf3948131..ce5ed8845042 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -63,6 +63,11 @@ enum hclge_cmd_status {
 	HCLGE_ERR_CSQ_ERROR	= -3,
 };
 
+struct hclge_misc_vector {
+	u8 __iomem *addr;
+	int vector_irq;
+};
+
 struct hclge_cmq {
 	struct hclge_cmq_ring csq;
 	struct hclge_cmq_ring crq;
@@ -221,12 +226,12 @@ enum hclge_opcode_type {
 #define HCLGE_RCB_INIT_QUERY_TIMEOUT	10
 #define HCLGE_RCB_INIT_FLAG_EN_B	0
 #define HCLGE_RCB_INIT_FLAG_FINI_B	8
-struct hclge_config_rcb_init {
+struct hclge_config_rcb_init_cmd {
 	__le16 rcb_init_flag;
 	u8 rsv[22];
 };
 
-struct hclge_tqp_map {
+struct hclge_tqp_map_cmd {
 	__le16 tqp_id;	/* Absolute tqp id for in this pf */
 	u8 tqp_vf;	/* VF id */
 #define HCLGE_TQP_MAP_TYPE_PF		0
@@ -246,15 +251,15 @@ enum hclge_int_type {
 	HCLGE_INT_EVENT,
 };
 
-struct hclge_ctrl_vector_chain {
+struct hclge_ctrl_vector_chain_cmd {
 	u8 int_vector_id;
 	u8 int_cause_num;
 #define HCLGE_INT_TYPE_S	0
-#define HCLGE_INT_TYPE_M	0x3
+#define HCLGE_INT_TYPE_M	GENMASK(1, 0)
 #define HCLGE_TQP_ID_S		2
-#define HCLGE_TQP_ID_M		(0x7ff << HCLGE_TQP_ID_S)
+#define HCLGE_TQP_ID_M		GENMASK(12, 2)
 #define HCLGE_INT_GL_IDX_S	13
-#define HCLGE_INT_GL_IDX_M	(0x3 << HCLGE_INT_GL_IDX_S)
+#define HCLGE_INT_GL_IDX_M	GENMASK(14, 13)
 	__le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD];
 	u8 vfid;
 	u8 rsv;
@@ -263,18 +268,18 @@ struct hclge_ctrl_vector_chain {
 #define HCLGE_TC_NUM		8
 #define HCLGE_TC0_PRI_BUF_EN_B	15 /* Bit 15 indicate enable or not */
 #define HCLGE_BUF_UNIT_S	7  /* Buf size is united by 128 bytes */
-struct hclge_tx_buff_alloc {
+struct hclge_tx_buff_alloc_cmd {
 	__le16 tx_pkt_buff[HCLGE_TC_NUM];
 	u8 tx_buff_rsv[8];
 };
 
-struct hclge_rx_priv_buff {
+struct hclge_rx_priv_buff_cmd {
 	__le16 buf_num[HCLGE_TC_NUM];
 	__le16 shared_buf;
 	u8 rsv[6];
 };
 
-struct hclge_query_version {
+struct hclge_query_version_cmd {
 	__le32 firmware;
 	__le32 firmware_rsv[5];
 };
@@ -311,6 +316,7 @@ struct hclge_tc_thrd {
 struct hclge_priv_buf {
 	struct hclge_waterline wl;	/* Waterline for low and high*/
 	u32 buf_size;	/* TC private buffer size */
+	u32 tx_buf_size;
 	u32 enable;	/* Enable TC private buffer or not */
 };
 
@@ -321,15 +327,20 @@ struct hclge_shared_buf {
 	u32 buf_size;
 };
 
+struct hclge_pkt_buf_alloc {
+	struct hclge_priv_buf priv_buf[HCLGE_MAX_TC_NUM];
+	struct hclge_shared_buf s_buf;
+};
+
 #define HCLGE_RX_COM_WL_EN_B	15
-struct hclge_rx_com_wl_buf {
+struct hclge_rx_com_wl_buf_cmd {
 	__le16 high_wl;
 	__le16 low_wl;
 	u8 rsv[20];
 };
 
 #define HCLGE_RX_PKT_EN_B	15
-struct hclge_rx_pkt_buf {
+struct hclge_rx_pkt_buf_cmd {
 	__le16 high_pkt;
 	__le16 low_pkt;
 	u8 rsv[20];
@@ -342,7 +353,7 @@ struct hclge_rx_pkt_buf {
 #define HCLGE_PF_MAC_NUM_MASK	0x3
 #define HCLGE_PF_STATE_MAIN	BIT(HCLGE_PF_STATE_MAIN_B)
 #define HCLGE_PF_STATE_DONE	BIT(HCLGE_PF_STATE_DONE_B)
-struct hclge_func_status {
+struct hclge_func_status_cmd {
 	__le32  vf_rst_state[4];
 	u8 pf_state;
 	u8 mac_id;
@@ -353,7 +364,7 @@ struct hclge_func_status {
 	u8 rsv[2];
 };
 
-struct hclge_pf_res {
+struct hclge_pf_res_cmd {
 	__le16 tqp_num;
 	__le16 buf_size;
 	__le16 msixcap_localid_ba_nic;
@@ -366,30 +377,30 @@ struct hclge_pf_res {
 };
 
 #define HCLGE_CFG_OFFSET_S	0
-#define HCLGE_CFG_OFFSET_M	0xfffff /* Byte (8-10.3) */
+#define HCLGE_CFG_OFFSET_M	GENMASK(19, 0)
 #define HCLGE_CFG_RD_LEN_S	24
-#define HCLGE_CFG_RD_LEN_M	(0xf << HCLGE_CFG_RD_LEN_S)
+#define HCLGE_CFG_RD_LEN_M	GENMASK(27, 24)
 #define HCLGE_CFG_RD_LEN_BYTES	16
 #define HCLGE_CFG_RD_LEN_UNIT	4
 
 #define HCLGE_CFG_VMDQ_S	0
-#define HCLGE_CFG_VMDQ_M	(0xff << HCLGE_CFG_VMDQ_S)
+#define HCLGE_CFG_VMDQ_M	GENMASK(7, 0)
 #define HCLGE_CFG_TC_NUM_S	8
-#define HCLGE_CFG_TC_NUM_M	(0xff << HCLGE_CFG_TC_NUM_S)
+#define HCLGE_CFG_TC_NUM_M	GENMASK(15, 8)
 #define HCLGE_CFG_TQP_DESC_N_S	16
-#define HCLGE_CFG_TQP_DESC_N_M	(0xffff << HCLGE_CFG_TQP_DESC_N_S)
+#define HCLGE_CFG_TQP_DESC_N_M	GENMASK(31, 16)
 #define HCLGE_CFG_PHY_ADDR_S	0
-#define HCLGE_CFG_PHY_ADDR_M	(0x1f << HCLGE_CFG_PHY_ADDR_S)
+#define HCLGE_CFG_PHY_ADDR_M	GENMASK(7, 0)
 #define HCLGE_CFG_MEDIA_TP_S	8
-#define HCLGE_CFG_MEDIA_TP_M	(0xff << HCLGE_CFG_MEDIA_TP_S)
+#define HCLGE_CFG_MEDIA_TP_M	GENMASK(15, 8)
 #define HCLGE_CFG_RX_BUF_LEN_S	16
-#define HCLGE_CFG_RX_BUF_LEN_M	(0xffff << HCLGE_CFG_RX_BUF_LEN_S)
+#define HCLGE_CFG_RX_BUF_LEN_M	GENMASK(31, 16)
 #define HCLGE_CFG_MAC_ADDR_H_S	0
-#define HCLGE_CFG_MAC_ADDR_H_M	(0xffff << HCLGE_CFG_MAC_ADDR_H_S)
+#define HCLGE_CFG_MAC_ADDR_H_M	GENMASK(15, 0)
 #define HCLGE_CFG_DEFAULT_SPEED_S	16
-#define HCLGE_CFG_DEFAULT_SPEED_M	(0xff << HCLGE_CFG_DEFAULT_SPEED_S)
+#define HCLGE_CFG_DEFAULT_SPEED_M	GENMASK(23, 16)
 
-struct hclge_cfg_param {
+struct hclge_cfg_param_cmd {
 	__le32 offset;
 	__le32 rsv;
 	__le32 param[4];
@@ -399,7 +410,7 @@ struct hclge_cfg_param {
 #define HCLGE_DESC_NUM		0x40
 
 #define HCLGE_ALLOC_VALID_B	0
-struct hclge_vf_num {
+struct hclge_vf_num_cmd {
 	u8 alloc_valid;
 	u8 rsv[23];
 };
@@ -407,13 +418,13 @@ struct hclge_vf_num {
 #define HCLGE_RSS_DEFAULT_OUTPORT_B	4
 #define HCLGE_RSS_HASH_KEY_OFFSET_B	4
 #define HCLGE_RSS_HASH_KEY_NUM		16
-struct hclge_rss_config {
+struct hclge_rss_config_cmd {
 	u8 hash_config;
 	u8 rsv[7];
 	u8 hash_key[HCLGE_RSS_HASH_KEY_NUM];
 };
 
-struct hclge_rss_input_tuple {
+struct hclge_rss_input_tuple_cmd {
 	u8 ipv4_tcp_en;
 	u8 ipv4_udp_en;
 	u8 ipv4_sctp_en;
@@ -427,26 +438,26 @@ struct hclge_rss_input_tuple {
 
 #define HCLGE_RSS_CFG_TBL_SIZE	16
 
-struct hclge_rss_indirection_table {
-	u16 start_table_index;
-	u16 rss_set_bitmap;
+struct hclge_rss_indirection_table_cmd {
+	__le16 start_table_index;
+	__le16 rss_set_bitmap;
 	u8 rsv[4];
 	u8 rss_result[HCLGE_RSS_CFG_TBL_SIZE];
 };
 
 #define HCLGE_RSS_TC_OFFSET_S		0
-#define HCLGE_RSS_TC_OFFSET_M		(0x3ff << HCLGE_RSS_TC_OFFSET_S)
+#define HCLGE_RSS_TC_OFFSET_M		GENMASK(9, 0)
 #define HCLGE_RSS_TC_SIZE_S		12
-#define HCLGE_RSS_TC_SIZE_M		(0x7 << HCLGE_RSS_TC_SIZE_S)
+#define HCLGE_RSS_TC_SIZE_M		GENMASK(14, 12)
 #define HCLGE_RSS_TC_VALID_B		15
-struct hclge_rss_tc_mode {
-	u16 rss_tc_mode[HCLGE_MAX_TC_NUM];
+struct hclge_rss_tc_mode_cmd {
+	__le16 rss_tc_mode[HCLGE_MAX_TC_NUM];
 	u8 rsv[8];
 };
 
 #define HCLGE_LINK_STS_B	0
 #define HCLGE_LINK_STATUS	BIT(HCLGE_LINK_STS_B)
-struct hclge_link_status {
+struct hclge_link_status_cmd {
 	u8 status;
 	u8 rsv[23];
 };
@@ -461,7 +472,7 @@ struct hclge_promisc_param {
 #define HCLGE_PROMISC_EN_UC	0x1
 #define HCLGE_PROMISC_EN_MC	0x2
 #define HCLGE_PROMISC_EN_BC	0x4
-struct hclge_promisc_cfg {
+struct hclge_promisc_cfg_cmd {
 	u8 flag;
 	u8 vf_id;
 	__le16 rsv0;
@@ -489,18 +500,18 @@ enum hclge_promisc_type {
 #define HCLGE_MAC_TX_UNDER_MIN_ERR_B		21
 #define HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B	22
 
-struct hclge_config_mac_mode {
+struct hclge_config_mac_mode_cmd {
 	__le32 txrx_pad_fcs_loop_en;
 	u8 rsv[20];
 };
 
 #define HCLGE_CFG_SPEED_S		0
-#define HCLGE_CFG_SPEED_M		(0x3f << HCLGE_CFG_SPEED_S)
+#define HCLGE_CFG_SPEED_M		GENMASK(5, 0)
 
 #define HCLGE_CFG_DUPLEX_B		7
 #define HCLGE_CFG_DUPLEX_M		BIT(HCLGE_CFG_DUPLEX_B)
 
-struct hclge_config_mac_speed_dup {
+struct hclge_config_mac_speed_dup_cmd {
 	u8 speed_dup;
 
 #define HCLGE_CFG_MAC_SPEED_CHANGE_EN_B	0
@@ -512,17 +523,17 @@ struct hclge_config_mac_speed_dup {
 #define HCLGE_QUERY_AN_B		0
 #define HCLGE_QUERY_DUPLEX_B		2
 
-#define HCLGE_QUERY_SPEED_M		(0x1f << HCLGE_QUERY_SPEED_S)
+#define HCLGE_QUERY_SPEED_M		GENMASK(4, 0)
 #define HCLGE_QUERY_AN_M		BIT(HCLGE_QUERY_AN_B)
 #define HCLGE_QUERY_DUPLEX_M		BIT(HCLGE_QUERY_DUPLEX_B)
 
-struct hclge_query_an_speed_dup {
+struct hclge_query_an_speed_dup_cmd {
 	u8 an_syn_dup_speed;
 	u8 pause;
 	u8 rsv[23];
 };
 
-#define HCLGE_RING_ID_MASK		0x3ff
+#define HCLGE_RING_ID_MASK		GENMASK(9, 0)
 #define HCLGE_TQP_ENABLE_B		0
 
 #define HCLGE_MAC_CFG_AN_EN_B		0
@@ -533,7 +544,7 @@ struct hclge_query_an_speed_dup {
 
 #define HCLGE_MAC_CFG_AN_EN	BIT(HCLGE_MAC_CFG_AN_EN_B)
 
-struct hclge_config_auto_neg {
+struct hclge_config_auto_neg_cmd {
 	__le32  cfg_an_cmd_flag;
 	u8      rsv[20];
 };
@@ -542,7 +553,7 @@ struct hclge_config_auto_neg {
 #define HCLGE_MAC_MAX_MTU		9728
 #define HCLGE_MAC_UPLINK_PORT		0x100
 
-struct hclge_config_max_frm_size {
+struct hclge_config_max_frm_size_cmd {
 	__le16  max_frm_size;
 	u8      rsv[22];
 };
@@ -559,10 +570,10 @@ enum hclge_mac_vlan_tbl_opcode {
 #define HCLGE_MAC_EPORT_SW_EN_B		0xc
 #define HCLGE_MAC_EPORT_TYPE_B		0xb
 #define HCLGE_MAC_EPORT_VFID_S		0x3
-#define HCLGE_MAC_EPORT_VFID_M		(0xff << HCLGE_MAC_EPORT_VFID_S)
+#define HCLGE_MAC_EPORT_VFID_M		GENMASK(10, 3)
 #define HCLGE_MAC_EPORT_PFID_S		0x0
-#define HCLGE_MAC_EPORT_PFID_M		(0x7 << HCLGE_MAC_EPORT_PFID_S)
-struct hclge_mac_vlan_tbl_entry {
+#define HCLGE_MAC_EPORT_PFID_M		GENMASK(2, 0)
+struct hclge_mac_vlan_tbl_entry_cmd {
 	u8	flags;
 	u8      resp_code;
 	__le16  vlan_tag;
@@ -577,15 +588,15 @@ struct hclge_mac_vlan_tbl_entry {
 };
 
 #define HCLGE_CFG_MTA_MAC_SEL_S		0x0
-#define HCLGE_CFG_MTA_MAC_SEL_M		(0x3 << HCLGE_CFG_MTA_MAC_SEL_S)
+#define HCLGE_CFG_MTA_MAC_SEL_M		GENMASK(1, 0)
 #define HCLGE_CFG_MTA_MAC_EN_B		0x7
-struct hclge_mta_filter_mode {
+struct hclge_mta_filter_mode_cmd {
 	u8	dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */
 	u8      rsv[23];
 };
 
 #define HCLGE_CFG_FUNC_MTA_ACCEPT_B	0x0
-struct hclge_cfg_func_mta_filter {
+struct hclge_cfg_func_mta_filter_cmd {
 	u8	accept; /* Only used lowest 1 bit */
 	u8      function_id;
 	u8      rsv[22];
@@ -593,14 +604,14 @@ struct hclge_cfg_func_mta_filter {
 
 #define HCLGE_CFG_MTA_ITEM_ACCEPT_B	0x0
 #define HCLGE_CFG_MTA_ITEM_IDX_S	0x0
-#define HCLGE_CFG_MTA_ITEM_IDX_M	(0xfff << HCLGE_CFG_MTA_ITEM_IDX_S)
-struct hclge_cfg_func_mta_item {
-	u16	item_idx; /* Only used lowest 12 bit */
+#define HCLGE_CFG_MTA_ITEM_IDX_M	GENMASK(11, 0)
+struct hclge_cfg_func_mta_item_cmd {
+	__le16	item_idx; /* Only used lowest 12 bit */
 	u8      accept;   /* Only used lowest 1 bit */
 	u8      rsv[21];
 };
 
-struct hclge_mac_vlan_add {
+struct hclge_mac_vlan_add_cmd {
 	__le16  flags;
 	__le16  mac_addr_hi16;
 	__le32  mac_addr_lo32;
@@ -613,7 +624,7 @@ struct hclge_mac_vlan_add {
 };
 
 #define HNS3_MAC_VLAN_CFG_FLAG_BIT 0
-struct hclge_mac_vlan_remove {
+struct hclge_mac_vlan_remove_cmd {
 	__le16  flags;
 	__le16  mac_addr_hi16;
 	__le32  mac_addr_lo32;
@@ -625,21 +636,21 @@ struct hclge_mac_vlan_remove {
 	u8      rsv[4];
 };
 
-struct hclge_vlan_filter_ctrl {
+struct hclge_vlan_filter_ctrl_cmd {
 	u8 vlan_type;
 	u8 vlan_fe;
 	u8 rsv[22];
 };
 
-struct hclge_vlan_filter_pf_cfg {
+struct hclge_vlan_filter_pf_cfg_cmd {
 	u8 vlan_offset;
 	u8 vlan_cfg;
 	u8 rsv[2];
 	u8 vlan_offset_bitmap[20];
 };
 
-struct hclge_vlan_filter_vf_cfg {
-	u16 vlan_id;
+struct hclge_vlan_filter_vf_cfg_cmd {
+	__le16 vlan_id;
 	u8  resp_code;
 	u8  rsv;
 	u8  vlan_cfg;
@@ -647,14 +658,14 @@ struct hclge_vlan_filter_vf_cfg {
 	u8  vf_bitmap[16];
 };
 
-struct hclge_cfg_com_tqp_queue {
+struct hclge_cfg_com_tqp_queue_cmd {
 	__le16 tqp_id;
 	__le16 stream_id;
 	u8 enable;
 	u8 rsv[19];
 };
 
-struct hclge_cfg_tx_queue_pointer {
+struct hclge_cfg_tx_queue_pointer_cmd {
 	__le16 tqp_id;
 	__le16 tx_tail;
 	__le16 tx_head;
@@ -664,12 +675,12 @@ struct hclge_cfg_tx_queue_pointer {
 };
 
 #define HCLGE_TSO_MSS_MIN_S	0
-#define HCLGE_TSO_MSS_MIN_M	(0x3FFF << HCLGE_TSO_MSS_MIN_S)
+#define HCLGE_TSO_MSS_MIN_M	GENMASK(13, 0)
 
 #define HCLGE_TSO_MSS_MAX_S	16
-#define HCLGE_TSO_MSS_MAX_M	(0x3FFF << HCLGE_TSO_MSS_MAX_S)
+#define HCLGE_TSO_MSS_MAX_M	GENMASK(29, 16)
 
-struct hclge_cfg_tso_status {
+struct hclge_cfg_tso_status_cmd {
 	__le16 tso_mss_min;
 	__le16 tso_mss_max;
 	u8 rsv[20];
@@ -679,13 +690,20 @@ struct hclge_cfg_tso_status {
 #define HCLGE_TSO_MSS_MAX	9668
 
 #define HCLGE_TQP_RESET_B	0
-struct hclge_reset_tqp_queue {
+struct hclge_reset_tqp_queue_cmd {
 	__le16 tqp_id;
 	u8 reset_req;
 	u8 ready_to_reset;
 	u8 rsv[20];
 };
 
+#define HCLGE_CFG_RESET_MAC_B		3
+#define HCLGE_CFG_RESET_FUNC_B		7
+struct hclge_reset_cmd {
+	u8 mac_func_reset;
+	u8 fun_reset_vfid;
+	u8 rsv[22];
+};
 #define HCLGE_DEFAULT_TX_BUF		0x4000	 /* 16k  bytes */
 #define HCLGE_TOTAL_PKT_BUF		0x108000 /* 1.03125M bytes */
 #define HCLGE_DEFAULT_DV		0xA000	 /* 40k byte */
@@ -733,6 +751,7 @@ struct hclge_hw;
 int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num);
 void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
 				enum hclge_opcode_type opcode, bool is_read);
+void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read);
 
 int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
 			       struct hclge_promisc_param *param);
@@ -743,4 +762,5 @@ enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw,
 					  struct hclge_desc *desc);
 
 void hclge_destroy_cmd_queue(struct hclge_hw *hw);
+int hclge_cmd_queue_init(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
new file mode 100644
index 000000000000..5018d6633133
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hclge_main.h"
+#include "hclge_tm.h"
+#include "hnae3.h"
+
+#define BW_PERCENT	100
+
+static int hclge_ieee_ets_to_tm_info(struct hclge_dev *hdev,
+				     struct ieee_ets *ets)
+{
+	u8 i;
+
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		switch (ets->tc_tsa[i]) {
+		case IEEE_8021QAZ_TSA_STRICT:
+			hdev->tm_info.tc_info[i].tc_sch_mode =
+				HCLGE_SCH_MODE_SP;
+			hdev->tm_info.pg_info[0].tc_dwrr[i] = 0;
+			break;
+		case IEEE_8021QAZ_TSA_ETS:
+			hdev->tm_info.tc_info[i].tc_sch_mode =
+				HCLGE_SCH_MODE_DWRR;
+			hdev->tm_info.pg_info[0].tc_dwrr[i] =
+				ets->tc_tx_bw[i];
+			break;
+		default:
+			/* Hardware only supports SP (strict priority)
+			 * or ETS (enhanced transmission selection)
+			 * algorithms, if we receive some other value
+			 * from dcbnl, then throw an error.
+			 */
+			return -EINVAL;
+		}
+	}
+
+	return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc);
+}
+
+static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
+				      struct ieee_ets *ets)
+{
+	u32 i;
+
+	memset(ets, 0, sizeof(*ets));
+	ets->willing = 1;
+	ets->ets_cap = hdev->tc_max;
+
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		ets->prio_tc[i] = hdev->tm_info.prio_tc[i];
+		ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+
+		if (hdev->tm_info.tc_info[i].tc_sch_mode ==
+		    HCLGE_SCH_MODE_SP)
+			ets->tc_tsa[i] = IEEE_8021QAZ_TSA_STRICT;
+		else
+			ets->tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+	}
+}
+
+/* IEEE std */
+static int hclge_ieee_getets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+
+	hclge_tm_info_to_ieee_ets(hdev, ets);
+
+	return 0;
+}
+
+static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
+			      u8 *tc, bool *changed)
+{
+	u32 total_ets_bw = 0;
+	u8 max_tc = 0;
+	u8 i;
+
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		if (ets->prio_tc[i] >= hdev->tc_max ||
+		    i >= hdev->tc_max)
+			return -EINVAL;
+
+		if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i])
+			*changed = true;
+
+		if (ets->prio_tc[i] > max_tc)
+			max_tc = ets->prio_tc[i];
+
+		switch (ets->tc_tsa[i]) {
+		case IEEE_8021QAZ_TSA_STRICT:
+			if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+				HCLGE_SCH_MODE_SP)
+				*changed = true;
+			break;
+		case IEEE_8021QAZ_TSA_ETS:
+			if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+				HCLGE_SCH_MODE_DWRR)
+				*changed = true;
+
+			total_ets_bw += ets->tc_tx_bw[i];
+		break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (total_ets_bw != BW_PERCENT)
+		return -EINVAL;
+
+	*tc = max_tc + 1;
+	if (*tc != hdev->tm_info.num_tc)
+		*changed = true;
+
+	return 0;
+}
+
+static int hclge_map_update(struct hnae3_handle *h)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	ret = hclge_tm_map_cfg(hdev);
+	if (ret)
+		return ret;
+
+	ret = hclge_tm_schd_mode_hw(hdev);
+	if (ret)
+		return ret;
+
+	ret = hclge_pause_setup_hw(hdev);
+	if (ret)
+		return ret;
+
+	ret = hclge_buffer_alloc(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_rss_init_hw(hdev);
+}
+
+static int hclge_client_setup_tc(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	struct hnae3_client *client;
+	struct hnae3_handle *handle;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		handle = &vport[i].nic;
+		client = handle->client;
+
+		if (!client || !client->ops || !client->ops->setup_tc)
+			continue;
+
+		ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+	bool map_changed = false;
+	u8 num_tc = 0;
+	int ret;
+
+	if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+		return -EINVAL;
+
+	ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
+	if (ret)
+		return ret;
+
+	hclge_tm_schd_info_update(hdev, num_tc);
+
+	ret = hclge_ieee_ets_to_tm_info(hdev, ets);
+	if (ret)
+		return ret;
+
+	if (map_changed) {
+		ret = hclge_client_setup_tc(hdev);
+		if (ret)
+			return ret;
+	}
+
+	return hclge_tm_dwrr_cfg(hdev);
+}
+
+static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+	u8 i, j, pfc_map, *prio_tc;
+
+	memset(pfc, 0, sizeof(*pfc));
+	pfc->pfc_cap = hdev->pfc_max;
+	prio_tc = hdev->tm_info.prio_tc;
+	pfc_map = hdev->tm_info.hw_pfc_map;
+
+	/* Pfc setting is based on TC */
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+			if ((prio_tc[j] == i) && (pfc_map & BIT(i)))
+				pfc->pfc_en |= BIT(j);
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+	u8 i, j, pfc_map, *prio_tc;
+
+	if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+		return -EINVAL;
+
+	prio_tc = hdev->tm_info.prio_tc;
+	pfc_map = 0;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+			if ((prio_tc[j] == i) && (pfc->pfc_en & BIT(j))) {
+				pfc_map |= BIT(i);
+				break;
+			}
+		}
+	}
+
+	if (pfc_map == hdev->tm_info.hw_pfc_map)
+		return 0;
+
+	hdev->tm_info.hw_pfc_map = pfc_map;
+
+	return hclge_pause_setup_hw(hdev);
+}
+
+/* DCBX configuration */
+static u8 hclge_getdcbx(struct hnae3_handle *h)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+
+	if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+		return 0;
+
+	return hdev->dcbx_cap;
+}
+
+static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+
+	/* No support for LLD_MANAGED modes or CEE */
+	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    (mode & DCB_CAP_DCBX_VER_CEE) ||
+	    !(mode & DCB_CAP_DCBX_HOST))
+		return 1;
+
+	hdev->dcbx_cap = mode;
+
+	return 0;
+}
+
+/* Set up TC for hardware offloaded mqprio in channel mode */
+static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc)
+{
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
+		return -EINVAL;
+
+	if (tc > hdev->tc_max) {
+		dev_err(&hdev->pdev->dev,
+			"setup tc failed, tc(%u) > tc_max(%u)\n",
+			tc, hdev->tc_max);
+		return -EINVAL;
+	}
+
+	hclge_tm_schd_info_update(hdev, tc);
+
+	ret = hclge_tm_prio_tc_info_update(hdev, prio_tc);
+	if (ret)
+		return ret;
+
+	ret = hclge_tm_init_hw(hdev);
+	if (ret)
+		return ret;
+
+	hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+
+	if (tc > 1)
+		hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
+	else
+		hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
+
+	return 0;
+}
+
+static const struct hnae3_dcb_ops hns3_dcb_ops = {
+	.ieee_getets	= hclge_ieee_getets,
+	.ieee_setets	= hclge_ieee_setets,
+	.ieee_getpfc	= hclge_ieee_getpfc,
+	.ieee_setpfc	= hclge_ieee_setpfc,
+	.getdcbx	= hclge_getdcbx,
+	.setdcbx	= hclge_setdcbx,
+	.map_update	= hclge_map_update,
+	.setup_tc	= hclge_setup_tc,
+};
+
+void hclge_dcb_ops_set(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	struct hnae3_knic_private_info *kinfo;
+
+	/* Hdev does not support DCB or vport is
+	 * not a pf, then dcb_ops is not set.
+	 */
+	if (!hnae3_dev_dcb_supported(hdev) ||
+	    vport->vport_id != 0)
+		return;
+
+	kinfo = &vport->nic.kinfo;
+	kinfo->dcb_ops = &hns3_dcb_ops;
+	hdev->dcbx_cap = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_HOST;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
new file mode 100644
index 000000000000..7d808ee96694
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_DCB_H__
+#define __HCLGE_DCB_H__
+
+#include "hclge_main.h"
+
+#ifdef CONFIG_HNS3_DCB
+void hclge_dcb_ops_set(struct hclge_dev *hdev);
+#else
+static inline void hclge_dcb_ops_set(struct hclge_dev *hdev) {}
+#endif
+
+#endif /* __HCLGE_DCB_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c1cdbfd83bdb..59ed806a52c3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 
 #include "hclge_cmd.h"
+#include "hclge_dcb.h"
 #include "hclge_main.h"
 #include "hclge_mdio.h"
 #include "hclge_tm.h"
@@ -30,11 +31,11 @@
 #define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f))
 #define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f))
 
-static int hclge_rss_init_hw(struct hclge_dev *hdev);
 static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
 				     enum hclge_mta_dmac_sel_type mta_mac_sel,
 				     bool enable);
 static int hclge_init_vlan_config(struct hclge_dev *hdev);
+static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -362,7 +363,7 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
 #define HCLGE_64_BIT_RTN_DATANUM 4
 	u64 *data = (u64 *)(&hdev->hw_stats.all_64_bit_stats);
 	struct hclge_desc desc[HCLGE_64_BIT_CMD_NUM];
-	u64 *desc_data;
+	__le64 *desc_data;
 	int i, k, n;
 	int ret;
 
@@ -376,14 +377,14 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
 
 	for (i = 0; i < HCLGE_64_BIT_CMD_NUM; i++) {
 		if (unlikely(i == 0)) {
-			desc_data = (u64 *)(&desc[i].data[0]);
+			desc_data = (__le64 *)(&desc[i].data[0]);
 			n = HCLGE_64_BIT_RTN_DATANUM - 1;
 		} else {
-			desc_data = (u64 *)(&desc[i]);
+			desc_data = (__le64 *)(&desc[i]);
 			n = HCLGE_64_BIT_RTN_DATANUM;
 		}
 		for (k = 0; k < n; k++) {
-			*data++ += cpu_to_le64(*desc_data);
+			*data++ += le64_to_cpu(*desc_data);
 			desc_data++;
 		}
 	}
@@ -411,7 +412,7 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
 
 	struct hclge_desc desc[HCLGE_32_BIT_CMD_NUM];
 	struct hclge_32_bit_stats *all_32_bit_stats;
-	u32 *desc_data;
+	__le32 *desc_data;
 	int i, k, n;
 	u64 *data;
 	int ret;
@@ -431,21 +432,27 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
 	hclge_reset_partial_32bit_counter(all_32_bit_stats);
 	for (i = 0; i < HCLGE_32_BIT_CMD_NUM; i++) {
 		if (unlikely(i == 0)) {
+			__le16 *desc_data_16bit;
+
 			all_32_bit_stats->igu_rx_err_pkt +=
-				cpu_to_le32(desc[i].data[0]);
+				le32_to_cpu(desc[i].data[0]);
+
+			desc_data_16bit = (__le16 *)&desc[i].data[1];
 			all_32_bit_stats->igu_rx_no_eof_pkt +=
-				cpu_to_le32(desc[i].data[1] & 0xffff);
+				le16_to_cpu(*desc_data_16bit);
+
+			desc_data_16bit++;
 			all_32_bit_stats->igu_rx_no_sof_pkt +=
-				cpu_to_le32((desc[i].data[1] >> 16) & 0xffff);
+				le16_to_cpu(*desc_data_16bit);
 
-			desc_data = (u32 *)(&desc[i].data[2]);
+			desc_data = &desc[i].data[2];
 			n = HCLGE_32_BIT_RTN_DATANUM - 4;
 		} else {
-			desc_data = (u32 *)(&desc[i]);
+			desc_data = (__le32 *)&desc[i];
 			n = HCLGE_32_BIT_RTN_DATANUM;
 		}
 		for (k = 0; k < n; k++) {
-			*data++ += cpu_to_le32(*desc_data);
+			*data++ += le32_to_cpu(*desc_data);
 			desc_data++;
 		}
 	}
@@ -460,7 +467,7 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev)
 
 	u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
 	struct hclge_desc desc[HCLGE_MAC_CMD_NUM];
-	u64 *desc_data;
+	__le64 *desc_data;
 	int i, k, n;
 	int ret;
 
@@ -475,14 +482,14 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev)
 
 	for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) {
 		if (unlikely(i == 0)) {
-			desc_data = (u64 *)(&desc[i].data[0]);
+			desc_data = (__le64 *)(&desc[i].data[0]);
 			n = HCLGE_RTN_DATA_NUM - 2;
 		} else {
-			desc_data = (u64 *)(&desc[i]);
+			desc_data = (__le64 *)(&desc[i]);
 			n = HCLGE_RTN_DATA_NUM;
 		}
 		for (k = 0; k < n; k++) {
-			*data++ += cpu_to_le64(*desc_data);
+			*data++ += le64_to_cpu(*desc_data);
 			desc_data++;
 		}
 	}
@@ -508,7 +515,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 					   HCLGE_OPC_QUERY_RX_STATUS,
 					   true);
 
-		desc[0].data[0] = (tqp->index & 0x1ff);
+		desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
 		ret = hclge_cmd_send(&hdev->hw, desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
@@ -517,7 +524,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 			return ret;
 		}
 		tqp->tqp_stats.rcb_rx_ring_pktnum_rcd +=
-			cpu_to_le32(desc[0].data[4]);
+			le32_to_cpu(desc[0].data[4]);
 	}
 
 	for (i = 0; i < kinfo->num_tqps; i++) {
@@ -528,7 +535,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 					   HCLGE_OPC_QUERY_TX_STATUS,
 					   true);
 
-		desc[0].data[0] = (tqp->index & 0x1ff);
+		desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
 		ret = hclge_cmd_send(&hdev->hw, desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
@@ -537,7 +544,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 			return ret;
 		}
 		tqp->tqp_stats.rcb_tx_ring_pktnum_rcd +=
-			cpu_to_le32(desc[0].data[4]);
+			le32_to_cpu(desc[0].data[4]);
 	}
 
 	return 0;
@@ -552,12 +559,12 @@ static u64 *hclge_tqps_get_stats(struct hnae3_handle *handle, u64 *data)
 
 	for (i = 0; i < kinfo->num_tqps; i++) {
 		tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
-		*buff++ = cpu_to_le64(tqp->tqp_stats.rcb_tx_ring_pktnum_rcd);
+		*buff++ = tqp->tqp_stats.rcb_tx_ring_pktnum_rcd;
 	}
 
 	for (i = 0; i < kinfo->num_tqps; i++) {
 		tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
-		*buff++ = cpu_to_le64(tqp->tqp_stats.rcb_rx_ring_pktnum_rcd);
+		*buff++ = tqp->tqp_stats.rcb_rx_ring_pktnum_rcd;
 	}
 
 	return buff;
@@ -820,7 +827,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data)
 }
 
 static int hclge_parse_func_status(struct hclge_dev *hdev,
-				   struct hclge_func_status *status)
+				   struct hclge_func_status_cmd *status)
 {
 	if (!(status->pf_state & HCLGE_PF_STATE_DONE))
 		return -EINVAL;
@@ -831,19 +838,18 @@ static int hclge_parse_func_status(struct hclge_dev *hdev,
 	else
 		hdev->flag &= ~HCLGE_FLAG_MAIN;
 
-	hdev->num_req_vfs = status->vf_num / status->pf_num;
 	return 0;
 }
 
 static int hclge_query_function_status(struct hclge_dev *hdev)
 {
-	struct hclge_func_status *req;
+	struct hclge_func_status_cmd *req;
 	struct hclge_desc desc;
 	int timeout = 0;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FUNC_STATUS, true);
-	req = (struct hclge_func_status *)desc.data;
+	req = (struct hclge_func_status_cmd *)desc.data;
 
 	do {
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -868,7 +874,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev)
 
 static int hclge_query_pf_resource(struct hclge_dev *hdev)
 {
-	struct hclge_pf_res *req;
+	struct hclge_pf_res_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
@@ -880,19 +886,19 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 		return ret;
 	}
 
-	req = (struct hclge_pf_res *)desc.data;
+	req = (struct hclge_pf_res_cmd *)desc.data;
 	hdev->num_tqps = __le16_to_cpu(req->tqp_num);
 	hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
 
 	if (hnae3_dev_roce_supported(hdev)) {
-		hdev->num_roce_msix =
+		hdev->num_roce_msi =
 		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
 			       HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
 
 		/* PF should have NIC vectors and Roce vectors,
 		 * NIC vectors are queued before Roce vectors.
 		 */
-		hdev->num_msi = hdev->num_roce_msix  + HCLGE_ROCE_VECTOR_OFFSET;
+		hdev->num_msi = hdev->num_roce_msi  + HCLGE_ROCE_VECTOR_OFFSET;
 	} else {
 		hdev->num_msi =
 		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
@@ -938,12 +944,12 @@ static int hclge_parse_speed(int speed_cmd, int *speed)
 
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
-	struct hclge_cfg_param *req;
+	struct hclge_cfg_param_cmd *req;
 	u64 mac_addr_tmp_high;
 	u64 mac_addr_tmp;
 	int i;
 
-	req = (struct hclge_cfg_param *)desc[0].data;
+	req = (struct hclge_cfg_param_cmd *)desc[0].data;
 
 	/* get the configuration */
 	cfg->vmdq_vport_num = hnae_get_field(__le32_to_cpu(req->param[0]),
@@ -978,7 +984,7 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 	for (i = 0; i < ETH_ALEN; i++)
 		cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff;
 
-	req = (struct hclge_cfg_param *)desc[1].data;
+	req = (struct hclge_cfg_param_cmd *)desc[1].data;
 	cfg->numa_node_map = __le32_to_cpu(req->param[0]);
 }
 
@@ -989,20 +995,21 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg)
 {
 	struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM];
-	struct hclge_cfg_param *req;
+	struct hclge_cfg_param_cmd *req;
 	int i, ret;
 
 	for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) {
-		req = (struct hclge_cfg_param *)desc[i].data;
+		u32 offset = 0;
+
+		req = (struct hclge_cfg_param_cmd *)desc[i].data;
 		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM,
 					   true);
-		hnae_set_field(req->offset, HCLGE_CFG_OFFSET_M,
+		hnae_set_field(offset, HCLGE_CFG_OFFSET_M,
 			       HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES);
 		/* Len should be united by 4 bytes when send to hardware */
-		hnae_set_field(req->offset, HCLGE_CFG_RD_LEN_M,
-			       HCLGE_CFG_RD_LEN_S,
+		hnae_set_field(offset, HCLGE_CFG_RD_LEN_M, HCLGE_CFG_RD_LEN_S,
 			       HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT);
-		req->offset = cpu_to_le32(req->offset);
+		req->offset = cpu_to_le32(offset);
 	}
 
 	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PF_CFG_DESC_NUM);
@@ -1058,7 +1065,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->hw.mac.phy_addr = cfg.phy_addr;
 	hdev->num_desc = cfg.tqp_desc_num;
 	hdev->tm_info.num_pg = 1;
-	hdev->tm_info.num_tc = cfg.tc_num;
+	hdev->tc_max = cfg.tc_num;
 	hdev->tm_info.hw_pfc_map = 0;
 
 	ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
@@ -1067,15 +1074,25 @@ static int hclge_configure(struct hclge_dev *hdev)
 		return ret;
 	}
 
-	if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) ||
-	    (hdev->tm_info.num_tc < 1)) {
+	if ((hdev->tc_max > HNAE3_MAX_TC) ||
+	    (hdev->tc_max < 1)) {
 		dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
-			 hdev->tm_info.num_tc);
-		hdev->tm_info.num_tc = 1;
+			 hdev->tc_max);
+		hdev->tc_max = 1;
+	}
+
+	/* Dev does not support DCB */
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		hdev->tc_max = 1;
+		hdev->pfc_max = 0;
+	} else {
+		hdev->pfc_max = hdev->tc_max;
 	}
 
+	hdev->tm_info.num_tc = hdev->tc_max;
+
 	/* Currently not support uncontiuous tc */
-	for (i = 0; i < cfg.tc_num; i++)
+	for (i = 0; i < hdev->tm_info.num_tc; i++)
 		hnae_set_bit(hdev->hw_tc_map, i, 1);
 
 	if (!hdev->num_vmdq_vport && !hdev->num_req_vfs)
@@ -1089,16 +1106,23 @@ static int hclge_configure(struct hclge_dev *hdev)
 static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min,
 			    int tso_mss_max)
 {
-	struct hclge_cfg_tso_status *req;
+	struct hclge_cfg_tso_status_cmd *req;
 	struct hclge_desc desc;
+	u16 tso_mss;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false);
 
-	req = (struct hclge_cfg_tso_status *)desc.data;
-	hnae_set_field(req->tso_mss_min, HCLGE_TSO_MSS_MIN_M,
+	req = (struct hclge_cfg_tso_status_cmd *)desc.data;
+
+	tso_mss = 0;
+	hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
 		       HCLGE_TSO_MSS_MIN_S, tso_mss_min);
-	hnae_set_field(req->tso_mss_max, HCLGE_TSO_MSS_MIN_M,
+	req->tso_mss_min = cpu_to_le16(tso_mss);
+
+	tso_mss = 0;
+	hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
 		       HCLGE_TSO_MSS_MIN_S, tso_mss_max);
+	req->tso_mss_max = cpu_to_le16(tso_mss);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
@@ -1134,15 +1158,15 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev)
 static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
 				  u16 tqp_pid, u16 tqp_vid, bool is_pf)
 {
-	struct hclge_tqp_map *req;
+	struct hclge_tqp_map_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SET_TQP_MAP, false);
 
-	req = (struct hclge_tqp_map *)desc.data;
+	req = (struct hclge_tqp_map_cmd *)desc.data;
 	req->tqp_id = cpu_to_le16(tqp_pid);
-	req->tqp_vf = cpu_to_le16(func_id);
+	req->tqp_vf = func_id;
 	req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B |
 			1 << HCLGE_TQP_MAP_EN_B;
 	req->tqp_vid = cpu_to_le16(tqp_vid);
@@ -1161,11 +1185,7 @@ static int  hclge_assign_tqp(struct hclge_vport *vport,
 			     struct hnae3_queue **tqp, u16 num_tqps)
 {
 	struct hclge_dev *hdev = vport->back;
-	int i, alloced, func_id, ret;
-	bool is_pf;
-
-	func_id = vport->vport_id;
-	is_pf = (vport->vport_id == 0) ? true : false;
+	int i, alloced;
 
 	for (i = 0, alloced = 0; i < hdev->num_tqps &&
 	     alloced < num_tqps; i++) {
@@ -1174,12 +1194,6 @@ static int  hclge_assign_tqp(struct hclge_vport *vport,
 			hdev->htqp[i].q.tqp_index = alloced;
 			tqp[alloced] = &hdev->htqp[i].q;
 			hdev->htqp[i].alloced = true;
-			ret = hclge_map_tqps_to_func(hdev, func_id,
-						     hdev->htqp[i].index,
-						     alloced, is_pf);
-			if (ret)
-				return ret;
-
 			alloced++;
 		}
 	}
@@ -1231,6 +1245,49 @@ static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps)
 	return 0;
 }
 
+static int hclge_map_tqp_to_vport(struct hclge_dev *hdev,
+				  struct hclge_vport *vport)
+{
+	struct hnae3_handle *nic = &vport->nic;
+	struct hnae3_knic_private_info *kinfo;
+	u16 i;
+
+	kinfo = &nic->kinfo;
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclge_tqp *q =
+			container_of(kinfo->tqp[i], struct hclge_tqp, q);
+		bool is_pf;
+		int ret;
+
+		is_pf = !(vport->vport_id);
+		ret = hclge_map_tqps_to_func(hdev, vport->vport_id, q->index,
+					     i, is_pf);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_map_tqp(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	u16 i, num_vport;
+
+	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
+	for (i = 0; i < num_vport; i++)	{
+		int ret;
+
+		ret = hclge_map_tqp_to_vport(hdev, vport);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
 static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps)
 {
 	/* this would be initialized later */
@@ -1324,23 +1381,27 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int  hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
+static int  hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev,
+				    struct hclge_pkt_buf_alloc *buf_alloc)
 {
 /* TX buffer size is unit by 128 byte */
 #define HCLGE_BUF_SIZE_UNIT_SHIFT	7
 #define HCLGE_BUF_SIZE_UPDATE_EN_MSK	BIT(15)
-	struct hclge_tx_buff_alloc *req;
+	struct hclge_tx_buff_alloc_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 	u8 i;
 
-	req = (struct hclge_tx_buff_alloc *)desc.data;
+	req = (struct hclge_tx_buff_alloc_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
-	for (i = 0; i < HCLGE_TC_NUM; i++)
+	for (i = 0; i < HCLGE_TC_NUM; i++) {
+		u32 buf_size = buf_alloc->priv_buf[i].tx_buf_size;
+
 		req->tx_pkt_buff[i] =
 			cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
 				     HCLGE_BUF_SIZE_UPDATE_EN_MSK);
+	}
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
@@ -1352,9 +1413,10 @@ static int  hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
 	return 0;
 }
 
-static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size)
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
+				 struct hclge_pkt_buf_alloc *buf_alloc)
 {
-	int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size);
+	int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc);
 
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
@@ -1387,13 +1449,14 @@ static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
 }
 
 /* Get the number of pfc enabled TCs, which have private buffer */
-static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
+				  struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_priv_buf *priv;
 	int i, cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		priv = &hdev->priv_buf[i];
+		priv = &buf_alloc->priv_buf[i];
 		if ((hdev->tm_info.hw_pfc_map & BIT(i)) &&
 		    priv->enable)
 			cnt++;
@@ -1403,13 +1466,14 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
 }
 
 /* Get the number of pfc disabled TCs, which have private buffer */
-static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev,
+				     struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_priv_buf *priv;
 	int i, cnt = 0;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		priv = &hdev->priv_buf[i];
+		priv = &buf_alloc->priv_buf[i];
 		if (hdev->hw_tc_map & BIT(i) &&
 		    !(hdev->tm_info.hw_pfc_map & BIT(i)) &&
 		    priv->enable)
@@ -1419,21 +1483,33 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
 	return cnt;
 }
 
-static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
+static u32 hclge_get_rx_priv_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_priv_buf *priv;
 	u32 rx_priv = 0;
 	int i;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		priv = &hdev->priv_buf[i];
+		priv = &buf_alloc->priv_buf[i];
 		if (priv->enable)
 			rx_priv += priv->buf_size;
 	}
 	return rx_priv;
 }
 
-static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
+static u32 hclge_get_tx_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
+{
+	u32 i, total_tx_size = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+		total_tx_size += buf_alloc->priv_buf[i].tx_buf_size;
+
+	return total_tx_size;
+}
+
+static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev,
+				struct hclge_pkt_buf_alloc *buf_alloc,
+				u32 rx_all)
 {
 	u32 shared_buf_min, shared_buf_tc, shared_std;
 	int tc_num, pfc_enable_num;
@@ -1454,46 +1530,74 @@ static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
 			hdev->mps;
 	shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
 
-	rx_priv = hclge_get_rx_priv_buff_alloced(hdev);
+	rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc);
 	if (rx_all <= rx_priv + shared_std)
 		return false;
 
 	shared_buf = rx_all - rx_priv;
-	hdev->s_buf.buf_size = shared_buf;
-	hdev->s_buf.self.high = shared_buf;
-	hdev->s_buf.self.low =  2 * hdev->mps;
+	buf_alloc->s_buf.buf_size = shared_buf;
+	buf_alloc->s_buf.self.high = shared_buf;
+	buf_alloc->s_buf.self.low =  2 * hdev->mps;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		if ((hdev->hw_tc_map & BIT(i)) &&
 		    (hdev->tm_info.hw_pfc_map & BIT(i))) {
-			hdev->s_buf.tc_thrd[i].low = hdev->mps;
-			hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps;
+			buf_alloc->s_buf.tc_thrd[i].low = hdev->mps;
+			buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps;
 		} else {
-			hdev->s_buf.tc_thrd[i].low = 0;
-			hdev->s_buf.tc_thrd[i].high = hdev->mps;
+			buf_alloc->s_buf.tc_thrd[i].low = 0;
+			buf_alloc->s_buf.tc_thrd[i].high = hdev->mps;
 		}
 	}
 
 	return true;
 }
 
+static int hclge_tx_buffer_calc(struct hclge_dev *hdev,
+				struct hclge_pkt_buf_alloc *buf_alloc)
+{
+	u32 i, total_size;
+
+	total_size = hdev->pkt_buf_size;
+
+	/* alloc tx buffer for all enabled tc */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+
+		if (total_size < HCLGE_DEFAULT_TX_BUF)
+			return -ENOMEM;
+
+		if (hdev->hw_tc_map & BIT(i))
+			priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+		else
+			priv->tx_buf_size = 0;
+
+		total_size -= priv->tx_buf_size;
+	}
+
+	return 0;
+}
+
 /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
  * @hdev: pointer to struct hclge_dev
- * @tx_size: the allocated tx buffer for all TCs
+ * @buf_alloc: pointer to buffer calculation data
  * @return: 0: calculate sucessful, negative: fail
  */
-int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
+static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
+				struct hclge_pkt_buf_alloc *buf_alloc)
 {
-	u32 rx_all = hdev->pkt_buf_size - tx_size;
+	u32 rx_all = hdev->pkt_buf_size;
 	int no_pfc_priv_num, pfc_priv_num;
 	struct hclge_priv_buf *priv;
 	int i;
 
+	rx_all -= hclge_get_tx_buff_alloced(buf_alloc);
+
 	/* When DCB is not supported, rx private
 	 * buffer is not allocated.
 	 */
 	if (!hnae3_dev_dcb_supported(hdev)) {
-		if (!hclge_is_rx_buf_ok(hdev, rx_all))
+		if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
 			return -ENOMEM;
 
 		return 0;
@@ -1501,7 +1605,7 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 
 	/* step 1, try to alloc private buffer for all enabled tc */
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		priv = &hdev->priv_buf[i];
+		priv = &buf_alloc->priv_buf[i];
 		if (hdev->hw_tc_map & BIT(i)) {
 			priv->enable = 1;
 			if (hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1522,14 +1626,14 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 		}
 	}
 
-	if (hclge_is_rx_buf_ok(hdev, rx_all))
+	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
 		return 0;
 
 	/* step 2, try to decrease the buffer size of
 	 * no pfc TC's private buffer
 	 */
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		priv = &hdev->priv_buf[i];
+		priv = &buf_alloc->priv_buf[i];
 
 		priv->enable = 0;
 		priv->wl.low = 0;
@@ -1552,18 +1656,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 		}
 	}
 
-	if (hclge_is_rx_buf_ok(hdev, rx_all))
+	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
 		return 0;
 
 	/* step 3, try to reduce the number of pfc disabled TCs,
 	 * which have private buffer
 	 */
 	/* get the total no pfc enable TC number, which have private buffer */
-	no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev);
+	no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc);
 
 	/* let the last to be cleared first */
 	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
-		priv = &hdev->priv_buf[i];
+		priv = &buf_alloc->priv_buf[i];
 
 		if (hdev->hw_tc_map & BIT(i) &&
 		    !(hdev->tm_info.hw_pfc_map & BIT(i))) {
@@ -1575,22 +1679,22 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 			no_pfc_priv_num--;
 		}
 
-		if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+		if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
 		    no_pfc_priv_num == 0)
 			break;
 	}
 
-	if (hclge_is_rx_buf_ok(hdev, rx_all))
+	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
 		return 0;
 
 	/* step 4, try to reduce the number of pfc enabled TCs
 	 * which have private buffer.
 	 */
-	pfc_priv_num = hclge_get_pfc_priv_num(hdev);
+	pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc);
 
 	/* let the last to be cleared first */
 	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
-		priv = &hdev->priv_buf[i];
+		priv = &buf_alloc->priv_buf[i];
 
 		if (hdev->hw_tc_map & BIT(i) &&
 		    hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1602,38 +1706,39 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 			pfc_priv_num--;
 		}
 
-		if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+		if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
 		    pfc_priv_num == 0)
 			break;
 	}
-	if (hclge_is_rx_buf_ok(hdev, rx_all))
+	if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
 		return 0;
 
 	return -ENOMEM;
 }
 
-static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
+static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev,
+				   struct hclge_pkt_buf_alloc *buf_alloc)
 {
-	struct hclge_rx_priv_buff *req;
+	struct hclge_rx_priv_buff_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 	int i;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, false);
-	req = (struct hclge_rx_priv_buff *)desc.data;
+	req = (struct hclge_rx_priv_buff_cmd *)desc.data;
 
 	/* Alloc private buffer TCs */
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		struct hclge_priv_buf *priv = &hdev->priv_buf[i];
+		struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
 
 		req->buf_num[i] =
 			cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S);
 		req->buf_num[i] |=
-			cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B);
+			cpu_to_le16(1 << HCLGE_TC0_PRI_BUF_EN_B);
 	}
 
 	req->shared_buf =
-		cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
+		cpu_to_le16((buf_alloc->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
 			    (1 << HCLGE_TC0_PRI_BUF_EN_B));
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1648,7 +1753,8 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
 
 #define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
 
-static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
+static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
+				   struct hclge_pkt_buf_alloc *buf_alloc)
 {
 	struct hclge_rx_priv_wl_buf *req;
 	struct hclge_priv_buf *priv;
@@ -1668,7 +1774,9 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
 			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 
 		for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
-			priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j];
+			u32 idx = i * HCLGE_TC_NUM_ONE_DESC + j;
+
+			priv = &buf_alloc->priv_buf[idx];
 			req->tc_wl[j].high =
 				cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
 			req->tc_wl[j].high |=
@@ -1693,9 +1801,10 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int hclge_common_thrd_config(struct hclge_dev *hdev)
+static int hclge_common_thrd_config(struct hclge_dev *hdev,
+				    struct hclge_pkt_buf_alloc *buf_alloc)
 {
-	struct hclge_shared_buf *s_buf = &hdev->s_buf;
+	struct hclge_shared_buf *s_buf = &buf_alloc->s_buf;
 	struct hclge_rx_com_thrd *req;
 	struct hclge_desc desc[2];
 	struct hclge_tc_thrd *tc;
@@ -1739,9 +1848,10 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int hclge_common_wl_config(struct hclge_dev *hdev)
+static int hclge_common_wl_config(struct hclge_dev *hdev,
+				  struct hclge_pkt_buf_alloc *buf_alloc)
 {
-	struct hclge_shared_buf *buf = &hdev->s_buf;
+	struct hclge_shared_buf *buf = &buf_alloc->s_buf;
 	struct hclge_rx_com_wl *req;
 	struct hclge_desc desc;
 	int ret;
@@ -1771,63 +1881,68 @@ static int hclge_common_wl_config(struct hclge_dev *hdev)
 
 int hclge_buffer_alloc(struct hclge_dev *hdev)
 {
-	u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+	struct hclge_pkt_buf_alloc *pkt_buf;
 	int ret;
 
-	hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
-					    sizeof(struct hclge_priv_buf),
-					    GFP_KERNEL | __GFP_ZERO);
-	if (!hdev->priv_buf)
+	pkt_buf = kzalloc(sizeof(*pkt_buf), GFP_KERNEL);
+	if (!pkt_buf)
 		return -ENOMEM;
 
-	ret = hclge_tx_buffer_alloc(hdev, tx_buf_size);
+	ret = hclge_tx_buffer_calc(hdev, pkt_buf);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not calc tx buffer size for all TCs %d\n", ret);
+		goto out;
+	}
+
+	ret = hclge_tx_buffer_alloc(hdev, pkt_buf);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"could not alloc tx buffers %d\n", ret);
-		return ret;
+		goto out;
 	}
 
-	ret = hclge_rx_buffer_calc(hdev, tx_buf_size);
+	ret = hclge_rx_buffer_calc(hdev, pkt_buf);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"could not calc rx priv buffer size for all TCs %d\n",
 			ret);
-		return ret;
+		goto out;
 	}
 
-	ret = hclge_rx_priv_buf_alloc(hdev);
+	ret = hclge_rx_priv_buf_alloc(hdev, pkt_buf);
 	if (ret) {
 		dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n",
 			ret);
-		return ret;
+		goto out;
 	}
 
 	if (hnae3_dev_dcb_supported(hdev)) {
-		ret = hclge_rx_priv_wl_config(hdev);
+		ret = hclge_rx_priv_wl_config(hdev, pkt_buf);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
 				"could not configure rx private waterline %d\n",
 				ret);
-			return ret;
+			goto out;
 		}
 
-		ret = hclge_common_thrd_config(hdev);
+		ret = hclge_common_thrd_config(hdev, pkt_buf);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
 				"could not configure common threshold %d\n",
 				ret);
-			return ret;
+			goto out;
 		}
 	}
 
-	ret = hclge_common_wl_config(hdev);
-	if (ret) {
+	ret = hclge_common_wl_config(hdev, pkt_buf);
+	if (ret)
 		dev_err(&hdev->pdev->dev,
 			"could not configure common waterline %d\n", ret);
-		return ret;
-	}
 
-	return 0;
+out:
+	kfree(pkt_buf);
+	return ret;
 }
 
 static int hclge_init_roce_base_info(struct hclge_vport *vport)
@@ -1835,7 +1950,7 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport)
 	struct hnae3_handle *roce = &vport->roce;
 	struct hnae3_handle *nic = &vport->nic;
 
-	roce->rinfo.num_vectors = vport->back->num_roce_msix;
+	roce->rinfo.num_vectors = vport->back->num_roce_msi;
 
 	if (vport->back->num_msi_left < vport->roce.rinfo.num_vectors ||
 	    vport->back->num_msi_left == 0)
@@ -1853,67 +1968,47 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport)
 	return 0;
 }
 
-static int hclge_init_msix(struct hclge_dev *hdev)
+static int hclge_init_msi(struct hclge_dev *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
-	int ret, i;
-
-	hdev->msix_entries = devm_kcalloc(&pdev->dev, hdev->num_msi,
-					  sizeof(struct msix_entry),
-					  GFP_KERNEL);
-	if (!hdev->msix_entries)
-		return -ENOMEM;
-
-	hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
-					   sizeof(u16), GFP_KERNEL);
-	if (!hdev->vector_status)
-		return -ENOMEM;
+	int vectors;
+	int i;
 
-	for (i = 0; i < hdev->num_msi; i++) {
-		hdev->msix_entries[i].entry = i;
-		hdev->vector_status[i] = HCLGE_INVALID_VPORT;
+	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+					PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	if (vectors < 0) {
+		dev_err(&pdev->dev,
+			"failed(%d) to allocate MSI/MSI-X vectors\n",
+			vectors);
+		return vectors;
 	}
+	if (vectors < hdev->num_msi)
+		dev_warn(&hdev->pdev->dev,
+			 "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n",
+			 hdev->num_msi, vectors);
 
-	hdev->num_msi_left = hdev->num_msi;
-	hdev->base_msi_vector = hdev->pdev->irq;
+	hdev->num_msi = vectors;
+	hdev->num_msi_left = vectors;
+	hdev->base_msi_vector = pdev->irq;
 	hdev->roce_base_vector = hdev->base_msi_vector +
 				HCLGE_ROCE_VECTOR_OFFSET;
 
-	ret = pci_enable_msix_range(hdev->pdev, hdev->msix_entries,
-				    hdev->num_msi, hdev->num_msi);
-	if (ret < 0) {
-		dev_info(&hdev->pdev->dev,
-			 "MSI-X vector alloc failed: %d\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int hclge_init_msi(struct hclge_dev *hdev)
-{
-	struct pci_dev *pdev = hdev->pdev;
-	int vectors;
-	int i;
-
 	hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
 					   sizeof(u16), GFP_KERNEL);
-	if (!hdev->vector_status)
+	if (!hdev->vector_status) {
+		pci_free_irq_vectors(pdev);
 		return -ENOMEM;
+	}
 
 	for (i = 0; i < hdev->num_msi; i++)
 		hdev->vector_status[i] = HCLGE_INVALID_VPORT;
 
-	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, PCI_IRQ_MSI);
-	if (vectors < 0) {
-		dev_err(&pdev->dev, "MSI vectors enable failed %d\n", vectors);
-		return -EINVAL;
+	hdev->vector_irq = devm_kcalloc(&pdev->dev, hdev->num_msi,
+					sizeof(int), GFP_KERNEL);
+	if (!hdev->vector_irq) {
+		pci_free_irq_vectors(pdev);
+		return -ENOMEM;
 	}
-	hdev->num_msi = vectors;
-	hdev->num_msi_left = vectors;
-	hdev->base_msi_vector = pdev->irq;
-	hdev->roce_base_vector = hdev->base_msi_vector +
-				HCLGE_ROCE_VECTOR_OFFSET;
 
 	return 0;
 }
@@ -1932,11 +2027,11 @@ static void hclge_check_speed_dup(struct hclge_dev *hdev, int duplex, int speed)
 
 int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
 {
-	struct hclge_config_mac_speed_dup *req;
+	struct hclge_config_mac_speed_dup_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
-	req = (struct hclge_config_mac_speed_dup *)desc.data;
+	req = (struct hclge_config_mac_speed_dup_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false);
 
@@ -2007,12 +2102,12 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed,
 static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
 					u8 *duplex)
 {
-	struct hclge_query_an_speed_dup *req;
+	struct hclge_query_an_speed_dup_cmd *req;
 	struct hclge_desc desc;
 	int speed_tmp;
 	int ret;
 
-	req = (struct hclge_query_an_speed_dup *)desc.data;
+	req = (struct hclge_query_an_speed_dup_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2040,11 +2135,11 @@ static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
 static int hclge_query_autoneg_result(struct hclge_dev *hdev)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
-	struct hclge_query_an_speed_dup *req;
+	struct hclge_query_an_speed_dup_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
-	req = (struct hclge_query_an_speed_dup *)desc.data;
+	req = (struct hclge_query_an_speed_dup_cmd *)desc.data;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2061,14 +2156,16 @@ static int hclge_query_autoneg_result(struct hclge_dev *hdev)
 
 static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
 {
-	struct hclge_config_auto_neg *req;
+	struct hclge_config_auto_neg_cmd *req;
 	struct hclge_desc desc;
+	u32 flag = 0;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
 
-	req = (struct hclge_config_auto_neg *)desc.data;
-	hnae_set_bit(req->cfg_an_cmd_flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+	req = (struct hclge_config_auto_neg_cmd *)desc.data;
+	hnae_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+	req->cfg_an_cmd_flag = cpu_to_le32(flag);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
@@ -2112,13 +2209,6 @@ static int hclge_mac_init(struct hclge_dev *hdev)
 
 	mac->link = 0;
 
-	ret = hclge_mac_mdio_config(hdev);
-	if (ret) {
-		dev_warn(&hdev->pdev->dev,
-			 "mdio config fail ret=%d\n", ret);
-		return ret;
-	}
-
 	/* Initialize the MTA table work mode */
 	hdev->accept_mta_mc	= true;
 	hdev->enable_mta	= true;
@@ -2146,7 +2236,7 @@ static void hclge_task_schedule(struct hclge_dev *hdev)
 
 static int hclge_get_mac_link_status(struct hclge_dev *hdev)
 {
-	struct hclge_link_status *req;
+	struct hclge_link_status_cmd *req;
 	struct hclge_desc desc;
 	int link_status;
 	int ret;
@@ -2159,7 +2249,7 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev)
 		return ret;
 	}
 
-	req = (struct hclge_link_status *)desc.data;
+	req = (struct hclge_link_status_cmd *)desc.data;
 	link_status = req->status & HCLGE_LINK_STATUS;
 
 	return !!link_status;
@@ -2215,18 +2305,7 @@ static int hclge_update_speed_duplex(struct hclge_dev *hdev)
 	/* get the speed and duplex as autoneg'result from mac cmd when phy
 	 * doesn't exit.
 	 */
-	if (mac.phydev)
-		return 0;
-
-	/* update mac->antoneg. */
-	ret = hclge_query_autoneg_result(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"autoneg result query failed %d\n", ret);
-		return ret;
-	}
-
-	if (!mac.autoneg)
+	if (mac.phydev || !mac.autoneg)
 		return 0;
 
 	ret = hclge_query_mac_an_speed_dup(hdev, &speed, &duplex);
@@ -2266,11 +2345,11 @@ static int hclge_get_status(struct hnae3_handle *handle)
 	return hdev->hw.mac.link;
 }
 
-static void hclge_service_timer(unsigned long data)
+static void hclge_service_timer(struct timer_list *t)
 {
-	struct hclge_dev *hdev = (struct hclge_dev *)data;
-	(void)mod_timer(&hdev->service_timer, jiffies + HZ);
+	struct hclge_dev *hdev = from_timer(hdev, t, service_timer);
 
+	mod_timer(&hdev->service_timer, jiffies + HZ);
 	hclge_task_schedule(hdev);
 }
 
@@ -2283,11 +2362,275 @@ static void hclge_service_complete(struct hclge_dev *hdev)
 	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
 }
 
+static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable)
+{
+	writel(enable ? 1 : 0, vector->addr);
+}
+
+static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
+{
+	struct hclge_dev *hdev = data;
+
+	hclge_enable_vector(&hdev->misc_vector, false);
+	if (!test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
+		schedule_work(&hdev->service_task);
+
+	return IRQ_HANDLED;
+}
+
+static void hclge_free_vector(struct hclge_dev *hdev, int vector_id)
+{
+	hdev->vector_status[vector_id] = HCLGE_INVALID_VPORT;
+	hdev->num_msi_left += 1;
+	hdev->num_msi_used -= 1;
+}
+
+static void hclge_get_misc_vector(struct hclge_dev *hdev)
+{
+	struct hclge_misc_vector *vector = &hdev->misc_vector;
+
+	vector->vector_irq = pci_irq_vector(hdev->pdev, 0);
+
+	vector->addr = hdev->hw.io_base + HCLGE_MISC_VECTOR_REG_BASE;
+	hdev->vector_status[0] = 0;
+
+	hdev->num_msi_left -= 1;
+	hdev->num_msi_used += 1;
+}
+
+static int hclge_misc_irq_init(struct hclge_dev *hdev)
+{
+	int ret;
+
+	hclge_get_misc_vector(hdev);
+
+	ret = devm_request_irq(&hdev->pdev->dev,
+			       hdev->misc_vector.vector_irq,
+			       hclge_misc_irq_handle, 0, "hclge_misc", hdev);
+	if (ret) {
+		hclge_free_vector(hdev, 0);
+		dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n",
+			hdev->misc_vector.vector_irq);
+	}
+
+	return ret;
+}
+
+static int hclge_notify_client(struct hclge_dev *hdev,
+			       enum hnae3_reset_notify_type type)
+{
+	struct hnae3_client *client = hdev->nic_client;
+	u16 i;
+
+	if (!client->ops->reset_notify)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		struct hnae3_handle *handle = &hdev->vport[i].nic;
+		int ret;
+
+		ret = client->ops->reset_notify(handle, type);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_reset_wait(struct hclge_dev *hdev)
+{
+#define HCLGE_RESET_WATI_MS	100
+#define HCLGE_RESET_WAIT_CNT	5
+	u32 val, reg, reg_bit;
+	u32 cnt = 0;
+
+	switch (hdev->reset_type) {
+	case HNAE3_GLOBAL_RESET:
+		reg = HCLGE_GLOBAL_RESET_REG;
+		reg_bit = HCLGE_GLOBAL_RESET_BIT;
+		break;
+	case HNAE3_CORE_RESET:
+		reg = HCLGE_GLOBAL_RESET_REG;
+		reg_bit = HCLGE_CORE_RESET_BIT;
+		break;
+	case HNAE3_FUNC_RESET:
+		reg = HCLGE_FUN_RST_ING;
+		reg_bit = HCLGE_FUN_RST_ING_B;
+		break;
+	default:
+		dev_err(&hdev->pdev->dev,
+			"Wait for unsupported reset type: %d\n",
+			hdev->reset_type);
+		return -EINVAL;
+	}
+
+	val = hclge_read_dev(&hdev->hw, reg);
+	while (hnae_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) {
+		msleep(HCLGE_RESET_WATI_MS);
+		val = hclge_read_dev(&hdev->hw, reg);
+		cnt++;
+	}
+
+	/* must clear reset status register to
+	 * prevent driver detect reset interrupt again
+	 */
+	reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG);
+	hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, reg);
+
+	if (cnt >= HCLGE_RESET_WAIT_CNT) {
+		dev_warn(&hdev->pdev->dev,
+			 "Wait for reset timeout: %d\n", hdev->reset_type);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
+{
+	struct hclge_desc desc;
+	struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false);
+	hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_MAC_B, 0);
+	hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_FUNC_B, 1);
+	req->fun_reset_vfid = func_id;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"send function reset cmd fail, status =%d\n", ret);
+
+	return ret;
+}
+
+static void hclge_do_reset(struct hclge_dev *hdev, enum hnae3_reset_type type)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	u32 val;
+
+	switch (type) {
+	case HNAE3_GLOBAL_RESET:
+		val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
+		hnae_set_bit(val, HCLGE_GLOBAL_RESET_BIT, 1);
+		hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
+		dev_info(&pdev->dev, "Global Reset requested\n");
+		break;
+	case HNAE3_CORE_RESET:
+		val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
+		hnae_set_bit(val, HCLGE_CORE_RESET_BIT, 1);
+		hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val);
+		dev_info(&pdev->dev, "Core Reset requested\n");
+		break;
+	case HNAE3_FUNC_RESET:
+		dev_info(&pdev->dev, "PF Reset requested\n");
+		hclge_func_reset_cmd(hdev, 0);
+		break;
+	default:
+		dev_warn(&pdev->dev,
+			 "Unsupported reset type: %d\n", type);
+		break;
+	}
+}
+
+static enum hnae3_reset_type hclge_detected_reset_event(struct hclge_dev *hdev)
+{
+	enum hnae3_reset_type rst_level = HNAE3_NONE_RESET;
+	u32 rst_reg_val;
+
+	rst_reg_val = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG);
+	if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_reg_val)
+		rst_level = HNAE3_GLOBAL_RESET;
+	else if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_reg_val)
+		rst_level = HNAE3_CORE_RESET;
+	else if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_reg_val)
+		rst_level = HNAE3_IMP_RESET;
+
+	return rst_level;
+}
+
+static void hclge_reset_event(struct hnae3_handle *handle,
+			      enum hnae3_reset_type reset)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	dev_info(&hdev->pdev->dev,
+		 "Receive reset event , reset_type is %d", reset);
+
+	switch (reset) {
+	case HNAE3_FUNC_RESET:
+	case HNAE3_CORE_RESET:
+	case HNAE3_GLOBAL_RESET:
+		if (test_bit(HCLGE_STATE_RESET_INT, &hdev->state)) {
+			dev_err(&hdev->pdev->dev, "Already in reset state");
+			return;
+		}
+		hdev->reset_type = reset;
+		set_bit(HCLGE_STATE_RESET_INT, &hdev->state);
+		set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+		schedule_work(&hdev->service_task);
+		break;
+	default:
+		dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset);
+		break;
+	}
+}
+
+static void hclge_reset_subtask(struct hclge_dev *hdev)
+{
+	bool do_reset;
+
+	do_reset = hdev->reset_type != HNAE3_NONE_RESET;
+
+	/* Reset is detected by interrupt */
+	if (hdev->reset_type == HNAE3_NONE_RESET)
+		hdev->reset_type = hclge_detected_reset_event(hdev);
+
+	if (hdev->reset_type == HNAE3_NONE_RESET)
+		return;
+
+	switch (hdev->reset_type) {
+	case HNAE3_FUNC_RESET:
+	case HNAE3_CORE_RESET:
+	case HNAE3_GLOBAL_RESET:
+	case HNAE3_IMP_RESET:
+		hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+		if (do_reset)
+			hclge_do_reset(hdev, hdev->reset_type);
+		else
+			set_bit(HCLGE_STATE_RESET_INT, &hdev->state);
+
+		if (!hclge_reset_wait(hdev)) {
+			hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+			hclge_reset_ae_dev(hdev->ae_dev);
+			hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
+			clear_bit(HCLGE_STATE_RESET_INT, &hdev->state);
+		}
+		hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+		break;
+	default:
+		dev_err(&hdev->pdev->dev, "Unsupported reset type:%d\n",
+			hdev->reset_type);
+		break;
+	}
+	hdev->reset_type = HNAE3_NONE_RESET;
+}
+
+static void hclge_misc_irq_service_task(struct hclge_dev *hdev)
+{
+	hclge_reset_subtask(hdev);
+	hclge_enable_vector(&hdev->misc_vector, true);
+}
+
 static void hclge_service_task(struct work_struct *work)
 {
 	struct hclge_dev *hdev =
 		container_of(work, struct hclge_dev, service_task);
 
+	hclge_misc_irq_service_task(hdev);
 	hclge_update_speed_duplex(hdev);
 	hclge_update_link_status(hdev);
 	hclge_update_stats_for_all(hdev);
@@ -2341,6 +2684,7 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num,
 					vport->vport_id *
 					HCLGE_VECTOR_VF_OFFSET;
 				hdev->vector_status[i] = vport->vport_id;
+				hdev->vector_irq[i] = vector->vector;
 
 				vector++;
 				alloc++;
@@ -2359,15 +2703,10 @@ static int hclge_get_vector_index(struct hclge_dev *hdev, int vector)
 {
 	int i;
 
-	for (i = 0; i < hdev->num_msi; i++) {
-		if (hdev->msix_entries) {
-			if (vector == hdev->msix_entries[i].vector)
-				return i;
-		} else {
-			if (vector == (hdev->base_msi_vector + i))
-				return i;
-		}
-	}
+	for (i = 0; i < hdev->num_msi; i++)
+		if (vector == hdev->vector_irq[i])
+			return i;
+
 	return -EINVAL;
 }
 
@@ -2383,7 +2722,7 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
 
 static int hclge_get_rss_algo(struct hclge_dev *hdev)
 {
-	struct hclge_rss_config *req;
+	struct hclge_rss_config_cmd *req;
 	struct hclge_desc desc;
 	int rss_hash_algo;
 	int ret;
@@ -2397,7 +2736,7 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev)
 		return ret;
 	}
 
-	req = (struct hclge_rss_config *)desc.data;
+	req = (struct hclge_rss_config_cmd *)desc.data;
 	rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
 
 	if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
@@ -2409,13 +2748,13 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev)
 static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 				  const u8 hfunc, const u8 *key)
 {
-	struct hclge_rss_config *req;
+	struct hclge_rss_config_cmd *req;
 	struct hclge_desc desc;
 	int key_offset;
 	int key_size;
 	int ret;
 
-	req = (struct hclge_rss_config *)desc.data;
+	req = (struct hclge_rss_config_cmd *)desc.data;
 
 	for (key_offset = 0; key_offset < 3; key_offset++) {
 		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG,
@@ -2446,19 +2785,20 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 
 static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
 {
-	struct hclge_rss_indirection_table *req;
+	struct hclge_rss_indirection_table_cmd *req;
 	struct hclge_desc desc;
 	int i, j;
 	int ret;
 
-	req = (struct hclge_rss_indirection_table *)desc.data;
+	req = (struct hclge_rss_indirection_table_cmd *)desc.data;
 
 	for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) {
 		hclge_cmd_setup_basic_desc
 			(&desc, HCLGE_OPC_RSS_INDIR_TABLE, false);
 
-		req->start_table_index = i * HCLGE_RSS_CFG_TBL_SIZE;
-		req->rss_set_bitmap = HCLGE_RSS_SET_BITMAP_MSK;
+		req->start_table_index =
+			cpu_to_le16(i * HCLGE_RSS_CFG_TBL_SIZE);
+		req->rss_set_bitmap = cpu_to_le16(HCLGE_RSS_SET_BITMAP_MSK);
 
 		for (j = 0; j < HCLGE_RSS_CFG_TBL_SIZE; j++)
 			req->rss_result[j] =
@@ -2478,21 +2818,24 @@ static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
 static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
 				 u16 *tc_size, u16 *tc_offset)
 {
-	struct hclge_rss_tc_mode *req;
+	struct hclge_rss_tc_mode_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 	int i;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_TC_MODE, false);
-	req = (struct hclge_rss_tc_mode *)desc.data;
+	req = (struct hclge_rss_tc_mode_cmd *)desc.data;
 
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		hnae_set_bit(req->rss_tc_mode[i], HCLGE_RSS_TC_VALID_B,
-			     (tc_valid[i] & 0x1));
-		hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_SIZE_M,
+		u16 mode = 0;
+
+		hnae_set_bit(mode, HCLGE_RSS_TC_VALID_B, (tc_valid[i] & 0x1));
+		hnae_set_field(mode, HCLGE_RSS_TC_SIZE_M,
 			       HCLGE_RSS_TC_SIZE_S, tc_size[i]);
-		hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_OFFSET_M,
+		hnae_set_field(mode, HCLGE_RSS_TC_OFFSET_M,
 			       HCLGE_RSS_TC_OFFSET_S, tc_offset[i]);
+
+		req->rss_tc_mode[i] = cpu_to_le16(mode);
 	}
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2507,15 +2850,13 @@ static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
 
 static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
 {
-#define HCLGE_RSS_INPUT_TUPLE_OTHER		0xf
-#define HCLGE_RSS_INPUT_TUPLE_SCTP		0x1f
-	struct hclge_rss_input_tuple *req;
+	struct hclge_rss_input_tuple_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
-	req = (struct hclge_rss_input_tuple *)desc.data;
+	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
 	req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 	req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 	req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
@@ -2589,6 +2930,161 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 	return ret;
 }
 
+static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
+{
+	u8 hash_sets = nfc->data & RXH_L4_B_0_1 ? HCLGE_S_PORT_BIT : 0;
+
+	if (nfc->data & RXH_L4_B_2_3)
+		hash_sets |= HCLGE_D_PORT_BIT;
+	else
+		hash_sets &= ~HCLGE_D_PORT_BIT;
+
+	if (nfc->data & RXH_IP_SRC)
+		hash_sets |= HCLGE_S_IP_BIT;
+	else
+		hash_sets &= ~HCLGE_S_IP_BIT;
+
+	if (nfc->data & RXH_IP_DST)
+		hash_sets |= HCLGE_D_IP_BIT;
+	else
+		hash_sets &= ~HCLGE_D_IP_BIT;
+
+	if (nfc->flow_type == SCTP_V4_FLOW || nfc->flow_type == SCTP_V6_FLOW)
+		hash_sets |= HCLGE_V_TAG_BIT;
+
+	return hash_sets;
+}
+
+static int hclge_set_rss_tuple(struct hnae3_handle *handle,
+			       struct ethtool_rxnfc *nfc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_rss_input_tuple_cmd *req;
+	struct hclge_desc desc;
+	u8 tuple_sets;
+	int ret;
+
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Read rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
+
+	hclge_cmd_reuse_desc(&desc, false);
+
+	tuple_sets = hclge_get_rss_hash_bits(nfc);
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		req->ipv4_tcp_en = tuple_sets;
+		break;
+	case TCP_V6_FLOW:
+		req->ipv6_tcp_en = tuple_sets;
+		break;
+	case UDP_V4_FLOW:
+		req->ipv4_udp_en = tuple_sets;
+		break;
+	case UDP_V6_FLOW:
+		req->ipv6_udp_en = tuple_sets;
+		break;
+	case SCTP_V4_FLOW:
+		req->ipv4_sctp_en = tuple_sets;
+		break;
+	case SCTP_V6_FLOW:
+		if ((nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+
+		req->ipv6_sctp_en = tuple_sets;
+		break;
+	case IPV4_FLOW:
+		req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+		break;
+	case IPV6_FLOW:
+		req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Set rss tuple fail, status = %d\n", ret);
+
+	return ret;
+}
+
+static int hclge_get_rss_tuple(struct hnae3_handle *handle,
+			       struct ethtool_rxnfc *nfc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_rss_input_tuple_cmd *req;
+	struct hclge_desc desc;
+	u8 tuple_sets;
+	int ret;
+
+	nfc->data = 0;
+
+	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Read rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		tuple_sets = req->ipv4_tcp_en;
+		break;
+	case UDP_V4_FLOW:
+		tuple_sets = req->ipv4_udp_en;
+		break;
+	case TCP_V6_FLOW:
+		tuple_sets = req->ipv6_tcp_en;
+		break;
+	case UDP_V6_FLOW:
+		tuple_sets = req->ipv6_udp_en;
+		break;
+	case SCTP_V4_FLOW:
+		tuple_sets = req->ipv4_sctp_en;
+		break;
+	case SCTP_V6_FLOW:
+		tuple_sets = req->ipv6_sctp_en;
+		break;
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		tuple_sets = HCLGE_S_IP_BIT | HCLGE_D_IP_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!tuple_sets)
+		return 0;
+
+	if (tuple_sets & HCLGE_D_PORT_BIT)
+		nfc->data |= RXH_L4_B_2_3;
+	if (tuple_sets & HCLGE_S_PORT_BIT)
+		nfc->data |= RXH_L4_B_0_1;
+	if (tuple_sets & HCLGE_D_IP_BIT)
+		nfc->data |= RXH_IP_DST;
+	if (tuple_sets & HCLGE_S_IP_BIT)
+		nfc->data |= RXH_IP_SRC;
+
+	return 0;
+}
+
 static int hclge_get_tc_size(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -2597,7 +3093,7 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
 	return hdev->rss_size_max;
 }
 
-static int hclge_rss_init_hw(struct hclge_dev *hdev)
+int hclge_rss_init_hw(struct hclge_dev *hdev)
 {
 	const  u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 	struct hclge_vport *vport = hdev->vport;
@@ -2682,7 +3178,7 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
 				   struct hnae3_ring_chain_node *ring_chain)
 {
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_ctrl_vector_chain *req;
+	struct hclge_ctrl_vector_chain_cmd *req;
 	struct hnae3_ring_chain_node *node;
 	struct hclge_desc desc;
 	int ret;
@@ -2690,20 +3186,21 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ADD_RING_TO_VECTOR, false);
 
-	req = (struct hclge_ctrl_vector_chain *)desc.data;
+	req = (struct hclge_ctrl_vector_chain_cmd *)desc.data;
 	req->int_vector_id = vector_id;
 
 	i = 0;
 	for (node = ring_chain; node; node = node->next) {
-		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
-			       HCLGE_INT_TYPE_S,
+		u16 type_and_id = 0;
+
+		hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S,
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
-		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
-			       HCLGE_TQP_ID_S,	node->tqp_index);
-		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+		hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S,
+			       node->tqp_index);
+		hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M,
 			       HCLGE_INT_GL_IDX_S,
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
-		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+		req->tqp_type_and_id[i] = cpu_to_le16(type_and_id);
 		req->vfid = vport->vport_id;
 
 		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
@@ -2739,9 +3236,9 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
 	return 0;
 }
 
-int hclge_map_handle_ring_to_vector(struct hnae3_handle *handle,
-				    int vector,
-				    struct hnae3_ring_chain_node *ring_chain)
+static int hclge_map_handle_ring_to_vector(
+		struct hnae3_handle *handle, int vector,
+		struct hnae3_ring_chain_node *ring_chain)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
@@ -2763,7 +3260,7 @@ static int hclge_unmap_ring_from_vector(
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_ctrl_vector_chain *req;
+	struct hclge_ctrl_vector_chain_cmd *req;
 	struct hnae3_ring_chain_node *node;
 	struct hclge_desc desc;
 	int i, vector_id;
@@ -2778,21 +3275,22 @@ static int hclge_unmap_ring_from_vector(
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_DEL_RING_TO_VECTOR, false);
 
-	req = (struct hclge_ctrl_vector_chain *)desc.data;
+	req = (struct hclge_ctrl_vector_chain_cmd *)desc.data;
 	req->int_vector_id = vector_id;
 
 	i = 0;
 	for (node = ring_chain; node; node = node->next) {
-		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
-			       HCLGE_INT_TYPE_S,
+		u16 type_and_id = 0;
+
+		hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S,
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
-		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
-			       HCLGE_TQP_ID_S,	node->tqp_index);
-		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+		hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S,
+			       node->tqp_index);
+		hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M,
 			       HCLGE_INT_GL_IDX_S,
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 
-		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+		req->tqp_type_and_id[i] = cpu_to_le16(type_and_id);
 		req->vfid = vport->vport_id;
 
 		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
@@ -2830,13 +3328,13 @@ static int hclge_unmap_ring_from_vector(
 int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
 			       struct hclge_promisc_param *param)
 {
-	struct hclge_promisc_cfg *req;
+	struct hclge_promisc_cfg_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PROMISC_MODE, false);
 
-	req = (struct hclge_promisc_cfg *)desc.data;
+	req = (struct hclge_promisc_cfg_cmd *)desc.data;
 	req->vf_id = param->vf_id;
 	req->flag = (param->enable << HCLGE_PROMISC_EN_B);
 
@@ -2878,29 +3376,27 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, u32 en)
 static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
 {
 	struct hclge_desc desc;
-	struct hclge_config_mac_mode *req =
-		(struct hclge_config_mac_mode *)desc.data;
+	struct hclge_config_mac_mode_cmd *req =
+		(struct hclge_config_mac_mode_cmd *)desc.data;
+	u32 loop_en = 0;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_TX_EN_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_EN_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_TX_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_RX_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_TX_B, 0);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_RX_B, 0);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_APP_LP_B, 0);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_LINE_LP_B, 0);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_FCS_TX_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_FCS_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en,
-		     HCLGE_MAC_RX_FCS_STRIP_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en,
-		     HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en,
-		     HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
-	hnae_set_bit(req->txrx_pad_fcs_loop_en,
-		     HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0);
+	hnae_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0);
+	hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
+	hnae_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0);
+	hnae_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
+	hnae_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+	req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
@@ -2908,12 +3404,65 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
 			"mac enable fail, ret =%d.\n", ret);
 }
 
+static int hclge_set_loopback(struct hnae3_handle *handle,
+			      enum hnae3_loop loop_mode, bool en)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_config_mac_mode_cmd *req;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	u32 loop_en;
+	int ret;
+
+	switch (loop_mode) {
+	case HNAE3_MAC_INTER_LOOP_MAC:
+		req = (struct hclge_config_mac_mode_cmd *)&desc.data[0];
+		/* 1 Read out the MAC mode config at first */
+		hclge_cmd_setup_basic_desc(&desc,
+					   HCLGE_OPC_CONFIG_MAC_MODE,
+					   true);
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"mac loopback get fail, ret =%d.\n",
+				ret);
+			return ret;
+		}
+
+		/* 2 Then setup the loopback flag */
+		loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
+		if (en)
+			hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 1);
+		else
+			hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
+
+		req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
+
+		/* 3 Config mac work mode with loopback flag
+		 * and its original configure parameters
+		 */
+		hclge_cmd_reuse_desc(&desc, false);
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret)
+			dev_err(&hdev->pdev->dev,
+				"mac loopback set fail, ret =%d.\n", ret);
+		break;
+	default:
+		ret = -ENOTSUPP;
+		dev_err(&hdev->pdev->dev,
+			"loop_mode %d is not supported\n", loop_mode);
+		break;
+	}
+
+	return ret;
+}
+
 static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id,
 			    int stream_id, bool enable)
 {
 	struct hclge_desc desc;
-	struct hclge_cfg_com_tqp_queue *req =
-		(struct hclge_cfg_com_tqp_queue *)desc.data;
+	struct hclge_cfg_com_tqp_queue_cmd *req =
+		(struct hclge_cfg_com_tqp_queue_cmd *)desc.data;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
@@ -2963,7 +3512,7 @@ static int hclge_ae_start(struct hnae3_handle *handle)
 	/* mac enable */
 	hclge_cfg_mac_mode(hdev, true);
 	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
-	(void)mod_timer(&hdev->service_timer, jiffies + HZ);
+	mod_timer(&hdev->service_timer, jiffies + HZ);
 
 	ret = hclge_mac_start_phy(hdev);
 	if (ret)
@@ -3077,16 +3626,16 @@ static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
 		word_num = vfid / 32;
 		bit_num  = vfid % 32;
 		if (clr)
-			desc[1].data[word_num] &= ~(1 << bit_num);
+			desc[1].data[word_num] &= cpu_to_le32(~(1 << bit_num));
 		else
-			desc[1].data[word_num] |= (1 << bit_num);
+			desc[1].data[word_num] |= cpu_to_le32(1 << bit_num);
 	} else {
 		word_num = (vfid - 192) / 32;
 		bit_num  = vfid % 32;
 		if (clr)
-			desc[2].data[word_num] &= ~(1 << bit_num);
+			desc[2].data[word_num] &= cpu_to_le32(~(1 << bit_num));
 		else
-			desc[2].data[word_num] |= (1 << bit_num);
+			desc[2].data[word_num] |= cpu_to_le32(1 << bit_num);
 	}
 
 	return 0;
@@ -3106,7 +3655,7 @@ static bool hclge_is_all_function_id_zero(struct hclge_desc *desc)
 	return true;
 }
 
-static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req,
+static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry_cmd *new_req,
 				   const u8 *addr)
 {
 	const unsigned char *mac_addr = addr;
@@ -3118,8 +3667,8 @@ static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req,
 	new_req->mac_addr_lo16 = cpu_to_le16(low_val & 0xffff);
 }
 
-u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport,
-				    const u8 *addr)
+static u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport,
+					   const u8 *addr)
 {
 	u16 high_val = addr[1] | (addr[0] << 8);
 	struct hclge_dev *hdev = vport->back;
@@ -3133,11 +3682,11 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
 				     enum hclge_mta_dmac_sel_type mta_mac_sel,
 				     bool enable)
 {
-	struct hclge_mta_filter_mode *req;
+	struct hclge_mta_filter_mode_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
-	req = (struct hclge_mta_filter_mode *)desc.data;
+	req = (struct hclge_mta_filter_mode_cmd *)desc.data;
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false);
 
 	hnae_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B,
@@ -3160,11 +3709,11 @@ int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
 			      u8 func_id,
 			      bool enable)
 {
-	struct hclge_cfg_func_mta_filter *req;
+	struct hclge_cfg_func_mta_filter_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
-	req = (struct hclge_cfg_func_mta_filter *)desc.data;
+	req = (struct hclge_cfg_func_mta_filter_cmd *)desc.data;
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false);
 
 	hnae_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B,
@@ -3187,17 +3736,18 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport,
 				    bool enable)
 {
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_cfg_func_mta_item *req;
+	struct hclge_cfg_func_mta_item_cmd *req;
 	struct hclge_desc desc;
+	u16 item_idx = 0;
 	int ret;
 
-	req = (struct hclge_cfg_func_mta_item *)desc.data;
+	req = (struct hclge_cfg_func_mta_item_cmd *)desc.data;
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false);
 	hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable);
 
-	hnae_set_field(req->item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
+	hnae_set_field(item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
 		       HCLGE_CFG_MTA_ITEM_IDX_S, idx);
-	req->item_idx = cpu_to_le16(req->item_idx);
+	req->item_idx = cpu_to_le16(item_idx);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
@@ -3211,16 +3761,17 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport,
 }
 
 static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport,
-				     struct hclge_mac_vlan_tbl_entry *req)
+				     struct hclge_mac_vlan_tbl_entry_cmd *req)
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
 	u8 resp_code;
+	u16 retval;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_REMOVE, false);
 
-	memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+	memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
@@ -3229,19 +3780,21 @@ static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport,
 			ret);
 		return ret;
 	}
-	resp_code = (desc.data[0] >> 8) & 0xff;
+	resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff;
+	retval = le16_to_cpu(desc.retval);
 
-	return hclge_get_mac_vlan_cmd_status(vport, desc.retval, resp_code,
+	return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code,
 					     HCLGE_MAC_VLAN_REMOVE);
 }
 
 static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
-				     struct hclge_mac_vlan_tbl_entry *req,
+				     struct hclge_mac_vlan_tbl_entry_cmd *req,
 				     struct hclge_desc *desc,
 				     bool is_mc)
 {
 	struct hclge_dev *hdev = vport->back;
 	u8 resp_code;
+	u16 retval;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_MAC_VLAN_ADD, true);
@@ -3249,7 +3802,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
 		desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 		memcpy(desc[0].data,
 		       req,
-		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		       sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
 		hclge_cmd_setup_basic_desc(&desc[1],
 					   HCLGE_OPC_MAC_VLAN_ADD,
 					   true);
@@ -3261,7 +3814,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
 	} else {
 		memcpy(desc[0].data,
 		       req,
-		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		       sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
 		ret = hclge_cmd_send(&hdev->hw, desc, 1);
 	}
 	if (ret) {
@@ -3270,19 +3823,21 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
 			ret);
 		return ret;
 	}
-	resp_code = (desc[0].data[0] >> 8) & 0xff;
+	resp_code = (le32_to_cpu(desc[0].data[0]) >> 8) & 0xff;
+	retval = le16_to_cpu(desc[0].retval);
 
-	return hclge_get_mac_vlan_cmd_status(vport, desc[0].retval, resp_code,
+	return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code,
 					     HCLGE_MAC_VLAN_LKUP);
 }
 
 static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
-				  struct hclge_mac_vlan_tbl_entry *req,
+				  struct hclge_mac_vlan_tbl_entry_cmd *req,
 				  struct hclge_desc *mc_desc)
 {
 	struct hclge_dev *hdev = vport->back;
 	int cfg_status;
 	u8 resp_code;
+	u16 retval;
 	int ret;
 
 	if (!mc_desc) {
@@ -3291,25 +3846,29 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
 		hclge_cmd_setup_basic_desc(&desc,
 					   HCLGE_OPC_MAC_VLAN_ADD,
 					   false);
-		memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+		memcpy(desc.data, req,
+		       sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-		resp_code = (desc.data[0] >> 8) & 0xff;
-		cfg_status = hclge_get_mac_vlan_cmd_status(vport, desc.retval,
+		resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff;
+		retval = le16_to_cpu(desc.retval);
+
+		cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval,
 							   resp_code,
 							   HCLGE_MAC_VLAN_ADD);
 	} else {
-		mc_desc[0].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		hclge_cmd_reuse_desc(&mc_desc[0], false);
 		mc_desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-		mc_desc[1].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		hclge_cmd_reuse_desc(&mc_desc[1], false);
 		mc_desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-		mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		hclge_cmd_reuse_desc(&mc_desc[2], false);
 		mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_NEXT);
 		memcpy(mc_desc[0].data, req,
-		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		       sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
 		ret = hclge_cmd_send(&hdev->hw, mc_desc, 3);
-		resp_code = (mc_desc[0].data[0] >> 8) & 0xff;
-		cfg_status = hclge_get_mac_vlan_cmd_status(vport,
-							   mc_desc[0].retval,
+		resp_code = (le32_to_cpu(mc_desc[0].data[0]) >> 8) & 0xff;
+		retval = le16_to_cpu(mc_desc[0].retval);
+
+		cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval,
 							   resp_code,
 							   HCLGE_MAC_VLAN_ADD);
 	}
@@ -3336,8 +3895,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 			     const unsigned char *addr)
 {
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_mac_vlan_tbl_entry req;
+	struct hclge_mac_vlan_tbl_entry_cmd req;
 	enum hclge_cmd_status status;
+	u16 egress_port = 0;
 
 	/* mac addr check */
 	if (is_zero_ether_addr(addr) ||
@@ -3357,15 +3917,15 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
 	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 0);
 	hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
-	hnae_set_bit(req.egress_port,
-		     HCLGE_MAC_EPORT_SW_EN_B, 0);
-	hnae_set_bit(req.egress_port,
-		     HCLGE_MAC_EPORT_TYPE_B, 0);
-	hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_VFID_M,
+
+	hnae_set_bit(egress_port, HCLGE_MAC_EPORT_SW_EN_B, 0);
+	hnae_set_bit(egress_port, HCLGE_MAC_EPORT_TYPE_B, 0);
+	hnae_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
 		       HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
-	hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_PFID_M,
+	hnae_set_field(egress_port, HCLGE_MAC_EPORT_PFID_M,
 		       HCLGE_MAC_EPORT_PFID_S, 0);
-	req.egress_port = cpu_to_le16(req.egress_port);
+
+	req.egress_port = cpu_to_le16(egress_port);
 
 	hclge_prepare_mac_addr(&req, addr);
 
@@ -3386,7 +3946,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 			    const unsigned char *addr)
 {
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_mac_vlan_tbl_entry req;
+	struct hclge_mac_vlan_tbl_entry_cmd req;
 	enum hclge_cmd_status status;
 
 	/* mac addr check */
@@ -3420,7 +3980,7 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport,
 			     const unsigned char *addr)
 {
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_mac_vlan_tbl_entry req;
+	struct hclge_mac_vlan_tbl_entry_cmd req;
 	struct hclge_desc desc[3];
 	u16 tbl_idx;
 	int status;
@@ -3471,7 +4031,7 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
 			    const unsigned char *addr)
 {
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_mac_vlan_tbl_entry req;
+	struct hclge_mac_vlan_tbl_entry_cmd req;
 	enum hclge_cmd_status status;
 	struct hclge_desc desc[3];
 	u16 tbl_idx;
@@ -3554,13 +4114,13 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
 static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
 				      bool filter_en)
 {
-	struct hclge_vlan_filter_ctrl *req;
+	struct hclge_vlan_filter_ctrl_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false);
 
-	req = (struct hclge_vlan_filter_ctrl *)desc.data;
+	req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data;
 	req->vlan_type = vlan_type;
 	req->vlan_fe = filter_en;
 
@@ -3578,8 +4138,8 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
 			     bool is_kill, u16 vlan, u8 qos, __be16 proto)
 {
 #define HCLGE_MAX_VF_BYTES  16
-	struct hclge_vlan_filter_vf_cfg *req0;
-	struct hclge_vlan_filter_vf_cfg *req1;
+	struct hclge_vlan_filter_vf_cfg_cmd *req0;
+	struct hclge_vlan_filter_vf_cfg_cmd *req1;
 	struct hclge_desc desc[2];
 	u8 vf_byte_val;
 	u8 vf_byte_off;
@@ -3595,10 +4155,10 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
 	vf_byte_off = vfid / 8;
 	vf_byte_val = 1 << (vfid % 8);
 
-	req0 = (struct hclge_vlan_filter_vf_cfg *)desc[0].data;
-	req1 = (struct hclge_vlan_filter_vf_cfg *)desc[1].data;
+	req0 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[0].data;
+	req1 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[1].data;
 
-	req0->vlan_id  = vlan;
+	req0->vlan_id  = cpu_to_le16(vlan);
 	req0->vlan_cfg = is_kill;
 
 	if (vf_byte_off < HCLGE_MAX_VF_BYTES)
@@ -3639,7 +4199,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	struct hclge_vlan_filter_pf_cfg *req;
+	struct hclge_vlan_filter_pf_cfg_cmd *req;
 	struct hclge_desc desc;
 	u8 vlan_offset_byte_val;
 	u8 vlan_offset_byte;
@@ -3652,7 +4212,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
 	vlan_offset_byte = (vlan_id % 160) / 8;
 	vlan_offset_byte_val = 1 << (vlan_id % 8);
 
-	req = (struct hclge_vlan_filter_pf_cfg *)desc.data;
+	req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data;
 	req->vlan_offset = vlan_offset_160;
 	req->vlan_cfg = is_kill;
 	req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val;
@@ -3714,7 +4274,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_config_max_frm_size *req;
+	struct hclge_config_max_frm_size_cmd *req;
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
 	int ret;
@@ -3725,7 +4285,7 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 	hdev->mps = new_mtu;
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, false);
 
-	req = (struct hclge_config_max_frm_size *)desc.data;
+	req = (struct hclge_config_max_frm_size_cmd *)desc.data;
 	req->max_frm_size = cpu_to_le16(new_mtu);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -3740,13 +4300,13 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 				    bool enable)
 {
-	struct hclge_reset_tqp_queue *req;
+	struct hclge_reset_tqp_queue_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, false);
 
-	req = (struct hclge_reset_tqp_queue *)desc.data;
+	req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
 	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
 	hnae_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
 
@@ -3762,13 +4322,13 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 
 static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
 {
-	struct hclge_reset_tqp_queue *req;
+	struct hclge_reset_tqp_queue_cmd *req;
 	struct hclge_desc desc;
 	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, true);
 
-	req = (struct hclge_reset_tqp_queue *)desc.data;
+	req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
 	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -3981,7 +4541,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 				vport->roce.client = client;
 			}
 
-			if (hdev->roce_client) {
+			if (hdev->roce_client && hdev->nic_client) {
 				ret = hclge_init_roce_base_info(vport);
 				if (ret)
 					goto err;
@@ -4007,13 +4567,19 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
 
 	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 		vport = &hdev->vport[i];
-		if (hdev->roce_client)
+		if (hdev->roce_client) {
 			hdev->roce_client->ops->uninit_instance(&vport->roce,
 								0);
+			hdev->roce_client = NULL;
+			vport->roce.client = NULL;
+		}
 		if (client->type == HNAE3_CLIENT_ROCE)
 			return;
-		if (client->ops->uninit_instance)
+		if (client->ops->uninit_instance) {
 			client->ops->uninit_instance(&vport->nic, 0);
+			hdev->nic_client = NULL;
+			vport->nic.client = NULL;
+		}
 	}
 }
 
@@ -4056,6 +4622,8 @@ static int hclge_pci_init(struct hclge_dev *hdev)
 		goto err_clr_master;
 	}
 
+	hdev->num_req_vfs = pci_sriov_get_totalvfs(pdev);
+
 	return 0;
 err_clr_master:
 	pci_clear_master(pdev);
@@ -4072,14 +4640,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
 
-	if (hdev->flag & HCLGE_FLAG_USE_MSIX) {
-		pci_disable_msix(pdev);
-		devm_kfree(&pdev->dev, hdev->msix_entries);
-		hdev->msix_entries = NULL;
-	} else {
-		pci_disable_msi(pdev);
-	}
-
+	pci_free_irq_vectors(pdev);
 	pci_clear_master(pdev);
 	pci_release_mem_regions(pdev);
 	pci_disable_device(pdev);
@@ -4097,9 +4658,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		goto err_hclge_dev;
 	}
 
-	hdev->flag |= HCLGE_FLAG_USE_MSIX;
 	hdev->pdev = pdev;
 	hdev->ae_dev = ae_dev;
+	hdev->reset_type = HNAE3_NONE_RESET;
 	ae_dev->priv = hdev;
 
 	ret = hclge_pci_init(hdev);
@@ -4108,7 +4669,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		goto err_pci_init;
 	}
 
-	/* Command queue initialize */
+	/* Firmware command queue initialize */
+	ret = hclge_cmd_queue_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cmd queue init failed, ret = %d.\n", ret);
+		return ret;
+	}
+
+	/* Firmware command initialize */
 	ret = hclge_cmd_init(hdev);
 	if (ret)
 		goto err_cmd_init;
@@ -4126,12 +4694,17 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	if (hdev->flag & HCLGE_FLAG_USE_MSIX)
-		ret = hclge_init_msix(hdev);
-	else
-		ret = hclge_init_msi(hdev);
+	ret = hclge_init_msi(hdev);
 	if (ret) {
-		dev_err(&pdev->dev, "Init msix/msi error, ret = %d.\n", ret);
+		dev_err(&pdev->dev, "Init MSI/MSI-X error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_misc_irq_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Misc IRQ(vector0) init error, ret = %d.\n",
+			ret);
 		return ret;
 	}
 
@@ -4147,6 +4720,19 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	ret = hclge_map_tqp(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_mac_mdio_config(hdev);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "mdio config fail ret=%d\n", ret);
+		return ret;
+	}
+
 	ret = hclge_mac_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
@@ -4182,10 +4768,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	setup_timer(&hdev->service_timer, hclge_service_timer,
-		    (unsigned long)hdev);
+	hclge_dcb_ops_set(hdev);
+
+	timer_setup(&hdev->service_timer, hclge_service_timer, 0);
 	INIT_WORK(&hdev->service_task, hclge_service_task);
 
+	/* Enable MISC vector(vector0) */
+	hclge_enable_vector(&hdev->misc_vector, true);
+
 	set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state);
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 
@@ -4200,6 +4790,91 @@ err_hclge_dev:
 	return ret;
 }
 
+static void hclge_stats_clear(struct hclge_dev *hdev)
+{
+	memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats));
+}
+
+static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct pci_dev *pdev = ae_dev->pdev;
+	int ret;
+
+	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+
+	hclge_stats_clear(hdev);
+
+	ret = hclge_cmd_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cmd queue init failed\n");
+		return ret;
+	}
+
+	ret = hclge_get_cap(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "get hw capability error, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_configure(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_map_tqp(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_mac_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_buffer_alloc(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
+	if (ret) {
+		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_init_vlan_config(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_tm_schd_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_rss_init_hw(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	/* Enable MISC vector(vector0) */
+	hclge_enable_vector(&hdev->misc_vector, true);
+
+	dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n",
+		 HCLGE_DRIVER_NAME);
+
+	return 0;
+}
+
 static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
@@ -4210,7 +4885,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	if (IS_ENABLED(CONFIG_PCI_IOV))
 		hclge_disable_sriov(hdev);
 
-	if (hdev->service_timer.data)
+	if (hdev->service_timer.function)
 		del_timer_sync(&hdev->service_timer);
 	if (hdev->service_task.func)
 		cancel_work_sync(&hdev->service_task);
@@ -4218,6 +4893,9 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	if (mac->phydev)
 		mdiobus_unregister(mac->mdio_bus);
 
+	/* Disable MISC vector(vector0) */
+	hclge_enable_vector(&hdev->misc_vector, false);
+	hclge_free_vector(hdev, 0);
 	hclge_destroy_cmd_queue(&hdev->hw);
 	hclge_pci_uninit(hdev);
 	ae_dev->priv = NULL;
@@ -4232,6 +4910,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.unmap_ring_from_vector = hclge_unmap_ring_from_vector,
 	.get_vector = hclge_get_vector,
 	.set_promisc_mode = hclge_set_promisc_mode,
+	.set_loopback = hclge_set_loopback,
 	.start = hclge_ae_start,
 	.stop = hclge_ae_stop,
 	.get_status = hclge_get_status,
@@ -4243,6 +4922,8 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_rss_indir_size = hclge_get_rss_indir_size,
 	.get_rss = hclge_get_rss,
 	.set_rss = hclge_set_rss,
+	.set_rss_tuple = hclge_set_rss_tuple,
+	.get_rss_tuple = hclge_get_rss_tuple,
 	.get_tc_size = hclge_get_tc_size,
 	.get_mac_addr = hclge_get_mac_addr,
 	.set_mac_addr = hclge_set_mac_addr,
@@ -4263,6 +4944,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_mdix_mode = hclge_get_mdix_mode,
 	.set_vlan_filter = hclge_set_port_vlan_filter,
 	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
+	.reset_event = hclge_reset_event,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9fcfd9395424..7027814ea5d7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -27,12 +27,13 @@
 	(HCLGE_PF_CFG_BLOCK_SIZE / HCLGE_CFG_RD_LEN_BYTES)
 
 #define HCLGE_VECTOR_REG_BASE		0x20000
+#define HCLGE_MISC_VECTOR_REG_BASE	0x20400
 
 #define HCLGE_VECTOR_REG_OFFSET		0x4
 #define HCLGE_VECTOR_VF_OFFSET		0x100000
 
 #define HCLGE_RSS_IND_TBL_SIZE		512
-#define HCLGE_RSS_SET_BITMAP_MSK	0xffff
+#define HCLGE_RSS_SET_BITMAP_MSK	GENMASK(15, 0)
 #define HCLGE_RSS_KEY_SIZE		40
 #define HCLGE_RSS_HASH_ALGO_TOEPLITZ	0
 #define HCLGE_RSS_HASH_ALGO_SIMPLE	1
@@ -41,6 +42,14 @@
 #define HCLGE_RSS_CFG_TBL_NUM \
 	(HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE)
 
+#define HCLGE_RSS_INPUT_TUPLE_OTHER	GENMASK(3, 0)
+#define HCLGE_RSS_INPUT_TUPLE_SCTP	GENMASK(4, 0)
+#define HCLGE_D_PORT_BIT		BIT(0)
+#define HCLGE_S_PORT_BIT		BIT(1)
+#define HCLGE_D_IP_BIT			BIT(2)
+#define HCLGE_S_IP_BIT			BIT(3)
+#define HCLGE_V_TAG_BIT			BIT(4)
+
 #define HCLGE_RSS_TC_SIZE_0		1
 #define HCLGE_RSS_TC_SIZE_1		2
 #define HCLGE_RSS_TC_SIZE_2		4
@@ -65,11 +74,24 @@
 #define HCLGE_PHY_CSS_REG		17
 
 #define HCLGE_PHY_MDIX_CTRL_S		(5)
-#define HCLGE_PHY_MDIX_CTRL_M		(3 << HCLGE_PHY_MDIX_CTRL_S)
+#define HCLGE_PHY_MDIX_CTRL_M		GENMASK(6, 5)
 
 #define HCLGE_PHY_MDIX_STATUS_B	(6)
 #define HCLGE_PHY_SPEED_DUP_RESOLVE_B	(11)
 
+/* Reset related Registers */
+#define HCLGE_MISC_RESET_STS_REG	0x20700
+#define HCLGE_GLOBAL_RESET_REG		0x20A00
+#define HCLGE_GLOBAL_RESET_BIT		0x0
+#define HCLGE_CORE_RESET_BIT		0x1
+#define HCLGE_FUN_RST_ING		0x20C00
+#define HCLGE_FUN_RST_ING_B		0
+
+/* Vector0 register bits define */
+#define HCLGE_VECTOR0_GLOBALRESET_INT_B	5
+#define HCLGE_VECTOR0_CORERESET_INT_B	6
+#define HCLGE_VECTOR0_IMPRESET_INT_B	7
+
 enum HCLGE_DEV_STATE {
 	HCLGE_STATE_REINITING,
 	HCLGE_STATE_DOWN,
@@ -79,6 +101,7 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_SERVICE_SCHED,
 	HCLGE_STATE_MBX_HANDLING,
 	HCLGE_STATE_MBX_IRQ,
+	HCLGE_STATE_RESET_INT,
 	HCLGE_STATE_MAX
 };
 
@@ -392,17 +415,16 @@ struct hclge_dev {
 	struct pci_dev *pdev;
 	struct hnae3_ae_dev *ae_dev;
 	struct hclge_hw hw;
+	struct hclge_misc_vector misc_vector;
 	struct hclge_hw_stats hw_stats;
 	unsigned long state;
 
+	enum hnae3_reset_type reset_type;
 	u32 fw_version;
 	u16 num_vmdq_vport;		/* Num vmdq vport this PF has set up */
 	u16 num_tqps;			/* Num task queue pairs of this PF */
 	u16 num_req_vfs;		/* Num VFs requested for this PF */
 
-	u16 num_roce_msix;		/* Num of roce vectors for this PF */
-	int roce_base_vector;
-
 	/* Base task tqp physical id of this PF */
 	u16 base_tqp_pid;
 	u16 alloc_rss_size;		/* Allocated RSS task queue */
@@ -421,16 +443,21 @@ struct hclge_dev {
 #define HCLGE_FLAG_TC_BASE_SCH_MODE		1
 #define HCLGE_FLAG_VNET_BASE_SCH_MODE		2
 	u8 tx_sch_mode;
+	u8 tc_max;
+	u8 pfc_max;
 
 	u8 default_up;
+	u8 dcbx_cap;
 	struct hclge_tm_info tm_info;
 
 	u16 num_msi;
 	u16 num_msi_left;
 	u16 num_msi_used;
 	u32 base_msi_vector;
-	struct msix_entry *msix_entries;
 	u16 *vector_status;
+	int *vector_irq;
+	u16 num_roce_msi;	/* Num of roce vectors for this PF */
+	int roce_base_vector;
 
 	u16 pending_udp_bitmap;
 
@@ -454,17 +481,14 @@ struct hclge_dev {
 	struct hnae3_client *nic_client;
 	struct hnae3_client *roce_client;
 
-#define HCLGE_FLAG_USE_MSI	0x00000001
-#define HCLGE_FLAG_USE_MSIX	0x00000002
-#define HCLGE_FLAG_MAIN		0x00000004
-#define HCLGE_FLAG_DCB_CAPABLE	0x00000008
-#define HCLGE_FLAG_DCB_ENABLE	0x00000010
+#define HCLGE_FLAG_MAIN			BIT(0)
+#define HCLGE_FLAG_DCB_CAPABLE		BIT(1)
+#define HCLGE_FLAG_DCB_ENABLE		BIT(2)
+#define HCLGE_FLAG_MQPRIO_ENABLE	BIT(3)
 	u32 flag;
 
 	u32 pkt_buf_size; /* Total pf buf size for tx/rx */
 	u32 mps; /* Max packet size */
-	struct hclge_priv_buf *priv_buf;
-	struct hclge_shared_buf s_buf;
 
 	enum hclge_mta_dmac_sel_type mta_mac_sel_type;
 	bool enable_mta; /* Mutilcast filter enable */
@@ -517,4 +541,7 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue)
 int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
 int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
 			     bool is_kill, u16 vlan, u8 qos, __be16 proto);
+
+int hclge_buffer_alloc(struct hclge_dev *hdev);
+int hclge_rss_init_hw(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index f32d719c4f77..7069e9408d7d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -14,6 +14,13 @@
 #include "hclge_main.h"
 #include "hclge_mdio.h"
 
+#define HCLGE_PHY_SUPPORTED_FEATURES	(SUPPORTED_Autoneg | \
+					 SUPPORTED_TP | \
+					 SUPPORTED_Pause | \
+					 PHY_10BT_FEATURES | \
+					 PHY_100BT_FEATURES | \
+					 PHY_1000BT_FEATURES)
+
 enum hclge_mdio_c22_op_seq {
 	HCLGE_MDIO_C22_WRITE = 1,
 	HCLGE_MDIO_C22_READ = 2
@@ -195,6 +202,9 @@ int hclge_mac_start_phy(struct hclge_dev *hdev)
 		return ret;
 	}
 
+	phydev->supported &= HCLGE_PHY_SUPPORTED_FEATURES;
+	phydev->advertising = phydev->supported;
+
 	phy_start(phydev);
 
 	return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 73a75d7cc551..7bfa2e5497cb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -124,6 +124,20 @@ static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
+static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
+				  u8 pfc_bitmap)
+{
+	struct hclge_desc desc;
+	struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false);
+
+	pfc->tx_rx_en_bitmap = tx_rx_bitmap;
+	pfc->pri_en_bitmap = pfc_bitmap;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
 static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
 {
 	u8 tc;
@@ -269,6 +283,7 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
 	struct hclge_pg_shapping_cmd *shap_cfg_cmd;
 	enum hclge_opcode_type opcode;
 	struct hclge_desc desc;
+	u32 shapping_para = 0;
 
 	opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING :
 		HCLGE_OPC_TM_PG_C_SHAPPING;
@@ -278,11 +293,41 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
 
 	shap_cfg_cmd->pg_id = pg_id;
 
-	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
-	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
-	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
-	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
-	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
+	hclge_tm_set_field(shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shapping_para, BS_B, bs_b);
+	hclge_tm_set_field(shapping_para, BS_S, bs_s);
+
+	shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_port_shapping_cmd *shap_cfg_cmd;
+	struct hclge_desc desc;
+	u32 shapping_para = 0;
+	u8 ir_u, ir_b, ir_s;
+	int ret;
+
+	ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE,
+				     HCLGE_SHAPER_LVL_PORT,
+				     &ir_b, &ir_u, &ir_s);
+	if (ret)
+		return ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false);
+	shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
+
+	hclge_tm_set_field(shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF);
+	hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF);
+
+	shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
@@ -295,6 +340,7 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
 	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
 	enum hclge_opcode_type opcode;
 	struct hclge_desc desc;
+	u32 shapping_para = 0;
 
 	opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING :
 		HCLGE_OPC_TM_PRI_C_SHAPPING;
@@ -305,11 +351,13 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
 
 	shap_cfg_cmd->pri_id = pri_id;
 
-	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
-	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
-	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
-	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
-	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
+	hclge_tm_set_field(shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shapping_para, BS_B, bs_b);
+	hclge_tm_set_field(shapping_para, BS_S, bs_s);
+
+	shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
@@ -346,13 +394,13 @@ static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id)
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id)
+static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode)
 {
 	struct hclge_desc desc;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false);
 
-	if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+	if (mode == HCLGE_SCH_MODE_DWRR)
 		desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
 	else
 		desc.data[1] = 0;
@@ -386,7 +434,6 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 	struct hclge_dev *hdev = vport->back;
 	u8 i;
 
-	kinfo = &vport->nic.kinfo;
 	vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
 	kinfo->num_tc =
 		min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc);
@@ -444,7 +491,11 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
 		hdev->tm_info.prio_tc[i] =
 			(i >= hdev->tm_info.num_tc) ? 0 : i;
 
-	hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+	/* DCB is enabled if we have more than 1 TC */
+	if (hdev->tm_info.num_tc > 1)
+		hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+	else
+		hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
 }
 
 static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
@@ -470,6 +521,24 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
 	}
 }
 
+static void hclge_pfc_info_init(struct hclge_dev *hdev)
+{
+	if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) {
+		if (hdev->fc_mode_last_time == HCLGE_FC_PFC)
+			dev_warn(&hdev->pdev->dev,
+				 "DCB is disable, but last mode is FC_PFC\n");
+
+		hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
+	} else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
+		/* fc_mode_last_time record the last fc_mode when
+		 * DCB is enabled, so that fc_mode can be set to
+		 * the correct value when DCB is disabled.
+		 */
+		hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+		hdev->tm_info.fc_mode = HCLGE_FC_PFC;
+	}
+}
+
 static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
 {
 	if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
@@ -482,8 +551,7 @@ static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
 
 	hclge_tm_vport_info_update(hdev);
 
-	hdev->tm_info.fc_mode = HCLGE_FC_NONE;
-	hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+	hclge_pfc_info_init(hdev);
 
 	return 0;
 }
@@ -596,17 +664,18 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
 {
 	struct hclge_vport *vport = hdev->vport;
 	int ret;
-	u32 i;
+	u32 i, k;
 
 	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
 		/* Cfg qs -> pri mapping, one by one mapping */
-		for (i = 0; i < hdev->tm_info.num_tc; i++) {
-			ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i);
-			if (ret)
-				return ret;
-		}
+		for (k = 0; k < hdev->num_alloc_vport; k++)
+			for (i = 0; i < hdev->tm_info.num_tc; i++) {
+				ret = hclge_tm_qs_to_pri_map_cfg(
+					hdev, vport[k].qs_offset + i, i);
+				if (ret)
+					return ret;
+			}
 	} else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) {
-		int k;
 		/* Cfg qs -> pri mapping,  qs = tc, pri = vf, 8 qs -> 1 pri */
 		for (k = 0; k < hdev->num_alloc_vport; k++)
 			for (i = 0; i < HNAE3_MAX_TC; i++) {
@@ -696,13 +765,11 @@ static int hclge_tm_pri_vnet_base_shaper_qs_cfg(struct hclge_vport *vport)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
 	struct hclge_dev *hdev = vport->back;
-	struct hnae3_tc_info *v_tc_info;
 	u8 ir_u, ir_b, ir_s;
 	u32 i;
 	int ret;
 
 	for (i = 0; i < kinfo->num_tc; i++) {
-		v_tc_info = &kinfo->tc_info[i];
 		ret = hclge_shaper_para_calc(
 					hdev->tm_info.tc_info[i].bw_limit,
 					HCLGE_SHAPER_LVL_QSET,
@@ -755,10 +822,11 @@ static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev)
 
 static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
 {
+	struct hclge_vport *vport = hdev->vport;
 	struct hclge_pg_info *pg_info;
 	u8 dwrr;
 	int ret;
-	u32 i;
+	u32 i, k;
 
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
 		pg_info =
@@ -769,9 +837,13 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
 		if (ret)
 			return ret;
 
-		ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr);
-		if (ret)
-			return ret;
+		for (k = 0; k < hdev->num_alloc_vport; k++) {
+			ret = hclge_tm_qs_weight_cfg(
+				hdev, vport[k].qs_offset + i,
+				vport[k].dwrr);
+			if (ret)
+				return ret;
+		}
 	}
 
 	return 0;
@@ -835,10 +907,14 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int hclge_tm_map_cfg(struct hclge_dev *hdev)
+int hclge_tm_map_cfg(struct hclge_dev *hdev)
 {
 	int ret;
 
+	ret = hclge_up_to_tc_map(hdev);
+	if (ret)
+		return ret;
+
 	ret = hclge_tm_pg_to_pri_map(hdev);
 	if (ret)
 		return ret;
@@ -850,6 +926,10 @@ static int hclge_tm_shaper_cfg(struct hclge_dev *hdev)
 {
 	int ret;
 
+	ret = hclge_tm_port_shaper_cfg(hdev);
+	if (ret)
+		return ret;
+
 	ret = hclge_tm_pg_shaper_cfg(hdev);
 	if (ret)
 		return ret;
@@ -898,7 +978,10 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
 		return ret;
 
 	for (i = 0; i < kinfo->num_tc; i++) {
-		ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i);
+		u8 sch_mode = hdev->tm_info.tc_info[i].tc_sch_mode;
+
+		ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i,
+						sch_mode);
 		if (ret)
 			return ret;
 	}
@@ -910,7 +993,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
 {
 	struct hclge_vport *vport = hdev->vport;
 	int ret;
-	u8 i;
+	u8 i, k;
 
 	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
 		for (i = 0; i < hdev->tm_info.num_tc; i++) {
@@ -918,9 +1001,13 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
 			if (ret)
 				return ret;
 
-			ret = hclge_tm_qs_schd_mode_cfg(hdev, i);
-			if (ret)
-				return ret;
+			for (k = 0; k < hdev->num_alloc_vport; k++) {
+				ret = hclge_tm_qs_schd_mode_cfg(
+					hdev, vport[k].qs_offset + i,
+					HCLGE_SCH_MODE_DWRR);
+				if (ret)
+					return ret;
+			}
 		}
 	} else {
 		for (i = 0; i < hdev->num_alloc_vport; i++) {
@@ -935,7 +1022,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
 {
 	int ret;
 
@@ -969,27 +1056,109 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
 	return hclge_tm_schd_mode_hw(hdev);
 }
 
+static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
+{
+	u8 enable_bitmap = 0;
+
+	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC)
+		enable_bitmap = HCLGE_TX_MAC_PAUSE_EN_MSK |
+				HCLGE_RX_MAC_PAUSE_EN_MSK;
+
+	return hclge_pfc_pause_en_cfg(hdev, enable_bitmap,
+				      hdev->tm_info.hw_pfc_map);
+}
+
+static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+{
+	bool tx_en, rx_en;
+
+	switch (hdev->tm_info.fc_mode) {
+	case HCLGE_FC_NONE:
+		tx_en = false;
+		rx_en = false;
+		break;
+	case HCLGE_FC_RX_PAUSE:
+		tx_en = false;
+		rx_en = true;
+		break;
+	case HCLGE_FC_TX_PAUSE:
+		tx_en = true;
+		rx_en = false;
+		break;
+	case HCLGE_FC_FULL:
+		tx_en = true;
+		rx_en = true;
+		break;
+	default:
+		tx_en = true;
+		rx_en = true;
+	}
+
+	return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
+}
+
 int hclge_pause_setup_hw(struct hclge_dev *hdev)
 {
-	bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC;
 	int ret;
 	u8 i;
 
-	ret = hclge_mac_pause_en_cfg(hdev, en, en);
-	if (ret)
-		return ret;
+	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+		return hclge_mac_pause_setup_hw(hdev);
 
-	/* Only DCB-supported dev supports qset back pressure setting */
+	/* Only DCB-supported dev supports qset back pressure and pfc cmd */
 	if (!hnae3_dev_dcb_supported(hdev))
 		return 0;
 
+	/* When MAC is GE Mode, hdev does not support pfc setting */
+	ret = hclge_pfc_setup_hw(hdev);
+	if (ret)
+		dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret);
+
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
 		ret = hclge_tm_qs_bp_cfg(hdev, i);
 		if (ret)
 			return ret;
 	}
 
-	return hclge_up_to_tc_map(hdev);
+	return 0;
+}
+
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
+{
+	struct hclge_vport *vport = hdev->vport;
+	struct hnae3_knic_private_info *kinfo;
+	u32 i, k;
+
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+		if (prio_tc[i] >= hdev->tm_info.num_tc)
+			return -EINVAL;
+		hdev->tm_info.prio_tc[i] = prio_tc[i];
+
+		for (k = 0;  k < hdev->num_alloc_vport; k++) {
+			kinfo = &vport[k].nic.kinfo;
+			kinfo->prio_tc[i] = prio_tc[i];
+		}
+	}
+	return 0;
+}
+
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
+{
+	u8 i, bit_map = 0;
+
+	hdev->tm_info.num_tc = num_tc;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++)
+		bit_map |= BIT(i);
+
+	if (!bit_map) {
+		bit_map = 1;
+		hdev->tm_info.num_tc = 1;
+	}
+
+	hdev->hw_tc_map = bit_map;
+
+	hclge_tm_schd_info_init(hdev);
 }
 
 int hclge_tm_init_hw(struct hclge_dev *hdev)
@@ -1013,8 +1182,13 @@ int hclge_tm_init_hw(struct hclge_dev *hdev)
 
 int hclge_tm_schd_init(struct hclge_dev *hdev)
 {
-	int ret = hclge_tm_schd_info_init(hdev);
+	int ret;
+
+	/* fc_mode is HCLGE_FC_FULL on reset */
+	hdev->tm_info.fc_mode = HCLGE_FC_FULL;
+	hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
 
+	ret = hclge_tm_schd_info_init(hdev);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 85158b0d73fe..bf59961918ab 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -94,6 +94,15 @@ struct hclge_bp_to_qs_map_cmd {
 	u32 rsvd1;
 };
 
+struct hclge_pfc_en_cmd {
+	u8 tx_rx_en_bitmap;
+	u8 pri_en_bitmap;
+};
+
+struct hclge_port_shapping_cmd {
+	__le32 port_shapping_para;
+};
+
 #define hclge_tm_set_field(dest, string, val) \
 			hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
 				       (HCLGE_TM_SHAP_##string##_LSH), val)
@@ -103,4 +112,10 @@ struct hclge_bp_to_qs_map_cmd {
 
 int hclge_tm_schd_init(struct hclge_dev *hdev);
 int hclge_pause_setup_hw(struct hclge_dev *hdev);
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev);
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
+int hclge_tm_map_cfg(struct hclge_dev *hdev);
+int hclge_tm_init_hw(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
new file mode 100644
index 000000000000..925619a7c50a
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hnae3.h"
+#include "hns3_enet.h"
+
+static
+int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (h->kinfo.dcb_ops->ieee_getets)
+		return h->kinfo.dcb_ops->ieee_getets(h, ets);
+
+	return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (h->kinfo.dcb_ops->ieee_setets)
+		return h->kinfo.dcb_ops->ieee_setets(h, ets);
+
+	return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (h->kinfo.dcb_ops->ieee_getpfc)
+		return h->kinfo.dcb_ops->ieee_getpfc(h, pfc);
+
+	return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (h->kinfo.dcb_ops->ieee_setpfc)
+		return h->kinfo.dcb_ops->ieee_setpfc(h, pfc);
+
+	return -EOPNOTSUPP;
+}
+
+/* DCBX configuration */
+static u8 hns3_dcbnl_getdcbx(struct net_device *ndev)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (h->kinfo.dcb_ops->getdcbx)
+		return h->kinfo.dcb_ops->getdcbx(h);
+
+	return 0;
+}
+
+/* return 0 if successful, otherwise fail */
+static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+
+	if (h->kinfo.dcb_ops->setdcbx)
+		return h->kinfo.dcb_ops->setdcbx(h, mode);
+
+	return 1;
+}
+
+static const struct dcbnl_rtnl_ops hns3_dcbnl_ops = {
+	.ieee_getets	= hns3_dcbnl_ieee_getets,
+	.ieee_setets	= hns3_dcbnl_ieee_setets,
+	.ieee_getpfc	= hns3_dcbnl_ieee_getpfc,
+	.ieee_setpfc	= hns3_dcbnl_ieee_setpfc,
+	.getdcbx	= hns3_dcbnl_getdcbx,
+	.setdcbx	= hns3_dcbnl_setdcbx,
+};
+
+/* hclge_dcbnl_setup - DCBNL setup
+ * @handle: the corresponding vport handle
+ * Set up DCBNL
+ */
+void hns3_dcbnl_setup(struct hnae3_handle *handle)
+{
+	struct net_device *dev = handle->kinfo.netdev;
+
+	if (!handle->kinfo.dcb_ops)
+		return;
+
+	dev->dcbnl_ops = &hns3_dcbnl_ops;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 35369e1c8036..59415090ff0f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -19,12 +19,13 @@
 #include <linux/sctp.h>
 #include <linux/vermagic.h>
 #include <net/gre.h>
+#include <net/pkt_cls.h>
 #include <net/vxlan.h>
 
 #include "hnae3.h"
 #include "hns3_enet.h"
 
-const char hns3_driver_name[] = "hns3";
+static const char hns3_driver_name[] = "hns3";
 const char hns3_driver_version[] = VERMAGIC_STRING;
 static const char hns3_driver_string[] =
 			"Hisilicon Ethernet Network Driver for Hip08 Family";
@@ -196,6 +197,31 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector)
 	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
 }
 
+static int hns3_nic_set_real_num_queue(struct net_device *netdev)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	unsigned int queue_size = kinfo->rss_size * kinfo->num_tc;
+	int ret;
+
+	ret = netif_set_real_num_tx_queues(netdev, queue_size);
+	if (ret) {
+		netdev_err(netdev,
+			   "netif_set_real_num_tx_queues fail, ret=%d!\n",
+			   ret);
+		return ret;
+	}
+
+	ret = netif_set_real_num_rx_queues(netdev, queue_size);
+	if (ret) {
+		netdev_err(netdev,
+			   "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int hns3_nic_net_up(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -233,25 +259,13 @@ out_start_err:
 static int hns3_nic_net_open(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
 	int ret;
 
 	netif_carrier_off(netdev);
 
-	ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps);
-	if (ret) {
-		netdev_err(netdev,
-			   "netif_set_real_num_tx_queues fail, ret=%d!\n",
-			   ret);
+	ret = hns3_nic_set_real_num_queue(netdev);
+	if (ret)
 		return ret;
-	}
-
-	ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps);
-	if (ret) {
-		netdev_err(netdev,
-			   "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
-		return ret;
-	}
 
 	ret = hns3_nic_net_up(netdev);
 	if (ret) {
@@ -260,6 +274,7 @@ static int hns3_nic_net_open(struct net_device *netdev)
 		return ret;
 	}
 
+	priv->last_reset_time = jiffies;
 	return 0;
 }
 
@@ -292,24 +307,10 @@ static int hns3_nic_net_stop(struct net_device *netdev)
 	return 0;
 }
 
-void hns3_set_multicast_list(struct net_device *netdev)
-{
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
-	struct netdev_hw_addr *ha = NULL;
-
-	if (h->ae_algo->ops->set_mc_addr) {
-		netdev_for_each_mc_addr(ha, netdev)
-			if (h->ae_algo->ops->set_mc_addr(h, ha->addr))
-				netdev_err(netdev, "set multicast fail\n");
-	}
-}
-
 static int hns3_nic_uc_sync(struct net_device *netdev,
 			    const unsigned char *addr)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (h->ae_algo->ops->add_uc_addr)
 		return h->ae_algo->ops->add_uc_addr(h, addr);
@@ -320,8 +321,7 @@ static int hns3_nic_uc_sync(struct net_device *netdev,
 static int hns3_nic_uc_unsync(struct net_device *netdev,
 			      const unsigned char *addr)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (h->ae_algo->ops->rm_uc_addr)
 		return h->ae_algo->ops->rm_uc_addr(h, addr);
@@ -332,8 +332,7 @@ static int hns3_nic_uc_unsync(struct net_device *netdev,
 static int hns3_nic_mc_sync(struct net_device *netdev,
 			    const unsigned char *addr)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (h->ae_algo->ops->add_mc_addr)
 		return h->ae_algo->ops->add_mc_addr(h, addr);
@@ -344,8 +343,7 @@ static int hns3_nic_mc_sync(struct net_device *netdev,
 static int hns3_nic_mc_unsync(struct net_device *netdev,
 			      const unsigned char *addr)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (h->ae_algo->ops->rm_mc_addr)
 		return h->ae_algo->ops->rm_mc_addr(h, addr);
@@ -353,10 +351,9 @@ static int hns3_nic_mc_unsync(struct net_device *netdev,
 	return 0;
 }
 
-void hns3_nic_set_rx_mode(struct net_device *netdev)
+static void hns3_nic_set_rx_mode(struct net_device *netdev)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (h->ae_algo->ops->set_promisc_mode) {
 		if (netdev->flags & IFF_PROMISC)
@@ -721,7 +718,7 @@ static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
 		       HNS3_TXD_BDTYPE_M, 0);
 	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
 	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
-	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 1);
+	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0);
 }
 
 static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
@@ -755,7 +752,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 
 	if (type == DESC_TYPE_SKB) {
 		skb = (struct sk_buff *)priv;
-		paylen = cpu_to_le16(skb->len);
+		paylen = skb->len;
 
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			skb_reset_mac_len(skb);
@@ -789,7 +786,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 			cpu_to_le32(ol_type_vlan_len_msec);
 		desc->tx.type_cs_vlan_tso_len =
 			cpu_to_le32(type_cs_vlan_tso);
-		desc->tx.paylen = cpu_to_le16(paylen);
+		desc->tx.paylen = cpu_to_le32(paylen);
 		desc->tx.mss = cpu_to_le16(mss);
 	}
 
@@ -905,8 +902,7 @@ static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
 	}
 }
 
-static netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb,
-				     struct net_device *netdev)
+netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hns3_nic_ring_data *ring_data =
@@ -1012,8 +1008,7 @@ out_net_tx_busy:
 
 static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct sockaddr *mac_addr = p;
 	int ret;
 
@@ -1193,61 +1188,80 @@ static void hns3_nic_udp_tunnel_del(struct net_device *netdev,
 	}
 }
 
-static int hns3_setup_tc(struct net_device *netdev, u8 tc)
+static int hns3_setup_tc(struct net_device *netdev, void *type_data)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map;
+	u8 tc = mqprio_qopt->qopt.num_tc;
+	u16 mode = mqprio_qopt->mode;
+	u8 hw = mqprio_qopt->qopt.hw;
+	bool if_running;
 	unsigned int i;
 	int ret;
 
+	if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS &&
+	       mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0)))
+		return -EOPNOTSUPP;
+
 	if (tc > HNAE3_MAX_TC)
 		return -EINVAL;
 
-	if (kinfo->num_tc == tc)
-		return 0;
-
 	if (!netdev)
 		return -EINVAL;
 
-	if (!tc) {
-		netdev_reset_tc(netdev);
-		return 0;
+	if_running = netif_running(netdev);
+	if (if_running) {
+		hns3_nic_net_stop(netdev);
+		msleep(100);
 	}
 
-	/* Set num_tc for netdev */
-	ret = netdev_set_num_tc(netdev, tc);
+	ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
+		kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
 	if (ret)
-		return ret;
+		goto out;
+
+	if (tc <= 1) {
+		netdev_reset_tc(netdev);
+	} else {
+		ret = netdev_set_num_tc(netdev, tc);
+		if (ret)
+			goto out;
+
+		for (i = 0; i < HNAE3_MAX_TC; i++) {
+			if (!kinfo->tc_info[i].enable)
+				continue;
 
-	/* Set per TC queues for the VSI */
-	for (i = 0; i < HNAE3_MAX_TC; i++) {
-		if (kinfo->tc_info[i].enable)
 			netdev_set_tc_queue(netdev,
 					    kinfo->tc_info[i].tc,
 					    kinfo->tc_info[i].tqp_count,
 					    kinfo->tc_info[i].tqp_offset);
+		}
 	}
 
-	return 0;
+	ret = hns3_nic_set_real_num_queue(netdev);
+
+out:
+	if (if_running)
+		hns3_nic_net_open(netdev);
+
+	return ret;
 }
 
 static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			     void *type_data)
 {
-	struct tc_mqprio_qopt *mqprio = type_data;
-
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
-	return hns3_setup_tc(dev, mqprio->num_tc);
+	return hns3_setup_tc(dev, type_data);
 }
 
 static int hns3_vlan_rx_add_vid(struct net_device *netdev,
 				__be16 proto, u16 vid)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
@@ -1259,8 +1273,7 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev,
 static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
 				 __be16 proto, u16 vid)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
@@ -1272,8 +1285,7 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 				u8 qos, __be16 vlan_proto)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vf_vlan_filter)
@@ -1285,8 +1297,7 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 
 static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	bool if_running = netif_running(netdev);
 	int ret;
 
@@ -1313,10 +1324,91 @@ static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu)
 	return ret;
 }
 
+static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct hns3_enet_ring *tx_ring = NULL;
+	int timeout_queue = 0;
+	int hw_head, hw_tail;
+	int i;
+
+	/* Find the stopped queue the same way the stack does */
+	for (i = 0; i < ndev->real_num_tx_queues; i++) {
+		struct netdev_queue *q;
+		unsigned long trans_start;
+
+		q = netdev_get_tx_queue(ndev, i);
+		trans_start = q->trans_start;
+		if (netif_xmit_stopped(q) &&
+		    time_after(jiffies,
+			       (trans_start + ndev->watchdog_timeo))) {
+			timeout_queue = i;
+			break;
+		}
+	}
+
+	if (i == ndev->num_tx_queues) {
+		netdev_info(ndev,
+			    "no netdev TX timeout queue found, timeout count: %llu\n",
+			    priv->tx_timeout_count);
+		return false;
+	}
+
+	tx_ring = priv->ring_data[timeout_queue].ring;
+
+	hw_head = readl_relaxed(tx_ring->tqp->io_base +
+				HNS3_RING_TX_RING_HEAD_REG);
+	hw_tail = readl_relaxed(tx_ring->tqp->io_base +
+				HNS3_RING_TX_RING_TAIL_REG);
+	netdev_info(ndev,
+		    "tx_timeout count: %llu, queue id: %d, SW_NTU: 0x%x, SW_NTC: 0x%x, HW_HEAD: 0x%x, HW_TAIL: 0x%x, INT: 0x%x\n",
+		    priv->tx_timeout_count,
+		    timeout_queue,
+		    tx_ring->next_to_use,
+		    tx_ring->next_to_clean,
+		    hw_head,
+		    hw_tail,
+		    readl(tx_ring->tqp_vector->mask_addr));
+
+	return true;
+}
+
+static void hns3_nic_net_timeout(struct net_device *ndev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	unsigned long last_reset_time = priv->last_reset_time;
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!hns3_get_tx_timeo_queue_info(ndev))
+		return;
+
+	priv->tx_timeout_count++;
+
+	/* This timeout is far away enough from last timeout,
+	 * if timeout again,set the reset type to PF reset
+	 */
+	if (time_after(jiffies, (last_reset_time + 20 * HZ)))
+		priv->reset_level = HNAE3_FUNC_RESET;
+
+	/* Don't do any new action before the next timeout */
+	else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo)))
+		return;
+
+	priv->last_reset_time = jiffies;
+
+	if (h->ae_algo->ops->reset_event)
+		h->ae_algo->ops->reset_event(h, priv->reset_level);
+
+	priv->reset_level++;
+	if (priv->reset_level > HNAE3_GLOBAL_RESET)
+		priv->reset_level = HNAE3_GLOBAL_RESET;
+}
+
 static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_open		= hns3_nic_net_open,
 	.ndo_stop		= hns3_nic_net_stop,
 	.ndo_start_xmit		= hns3_nic_net_xmit,
+	.ndo_tx_timeout		= hns3_nic_net_timeout,
 	.ndo_set_mac_address	= hns3_nic_net_set_mac_address,
 	.ndo_change_mtu		= hns3_nic_change_mtu,
 	.ndo_set_features	= hns3_nic_set_features,
@@ -1435,8 +1527,6 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
 	cb->length = hnae_page_size(ring);
 	cb->type = DESC_TYPE_PAGE;
 
-	memset(cb->buf, 0, cb->length);
-
 	return 0;
 }
 
@@ -1546,7 +1636,7 @@ static int hns3_reserve_buffer_map(struct hns3_enet_ring *ring,
 	return 0;
 
 out_with_buf:
-	hns3_free_buffers(ring);
+	hns3_free_buffer(ring, cb);
 out:
 	return ret;
 }
@@ -1586,7 +1676,7 @@ out_buffer_fail:
 static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
 				struct hns3_desc_cb *res_cb)
 {
-	hns3_map_buffer(ring, &ring->desc_cb[i]);
+	hns3_unmap_buffer(ring, &ring->desc_cb[i]);
 	ring->desc_cb[i] = *res_cb;
 	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
 }
@@ -1622,7 +1712,7 @@ static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
 	return u > c ? (h > c && h <= u) : (h > c || h <= u);
 }
 
-int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
+bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
 {
 	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
 	struct netdev_queue *dev_queue;
@@ -1633,7 +1723,7 @@ int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
 	rmb(); /* Make sure head is ready before touch any data */
 
 	if (is_ring_empty(ring) || head == ring->next_to_clean)
-		return 0; /* no data to poll */
+		return true; /* no data to poll */
 
 	if (!is_valid_clean_head(ring, head)) {
 		netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
@@ -1642,7 +1732,7 @@ int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.io_err_cnt++;
 		u64_stats_update_end(&ring->syncp);
-		return -EIO;
+		return true;
 	}
 
 	bytes = 0;
@@ -1933,6 +2023,11 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
 	}
 }
 
+static void hns3_rx_skb(struct hns3_enet_ring *ring, struct sk_buff *skb)
+{
+	napi_gro_receive(&ring->tqp_vector->napi, skb);
+}
+
 static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 			     struct sk_buff **out_skb, int *out_bnum)
 {
@@ -2067,7 +2162,9 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
 	return 0;
 }
 
-static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget)
+int hns3_clean_rx_ring(
+		struct hns3_enet_ring *ring, int budget,
+		void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
 {
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
 	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
@@ -2105,7 +2202,7 @@ static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget)
 
 		/* Do update ip stack process */
 		skb->protocol = eth_type_trans(skb, netdev);
-		(void)napi_gro_receive(&ring->tqp_vector->napi, skb);
+		rx_fn(ring, skb);
 
 		recv_pkts++;
 	}
@@ -2248,7 +2345,8 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
 	rx_budget = max(budget / tqp_vector->num_tqps, 1);
 
 	hns3_for_each_ring(ring, tqp_vector->rx_group) {
-		int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget);
+		int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget,
+						    hns3_rx_skb);
 
 		if (rx_cleaned >= rx_budget)
 			clean_complete = false;
@@ -2460,9 +2558,8 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 			(void)irq_set_affinity_hint(
 				priv->tqp_vector[i].vector_irq,
 						    NULL);
-			devm_free_irq(&pdev->dev,
-				      priv->tqp_vector[i].vector_irq,
-				      &priv->tqp_vector[i]);
+			free_irq(priv->tqp_vector[i].vector_irq,
+				 &priv->tqp_vector[i]);
 		}
 
 		priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
@@ -2489,16 +2586,16 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
 
 	if (ring_type == HNAE3_RING_TYPE_TX) {
 		ring_data[q->tqp_index].ring = ring;
+		ring_data[q->tqp_index].queue_index = q->tqp_index;
 		ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
 	} else {
 		ring_data[q->tqp_index + queue_num].ring = ring;
+		ring_data[q->tqp_index + queue_num].queue_index = q->tqp_index;
 		ring->io_base = q->io_base;
 	}
 
 	hnae_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
 
-	ring_data[q->tqp_index].queue_index = q->tqp_index;
-
 	ring->tqp = q;
 	ring->desc = NULL;
 	ring->desc_cb = NULL;
@@ -2596,7 +2693,7 @@ static void hns3_fini_ring(struct hns3_enet_ring *ring)
 	ring->next_to_use = 0;
 }
 
-int hns3_buf_size2type(u32 buf_size)
+static int hns3_buf_size2type(u32 buf_size)
 {
 	int bd_size_type;
 
@@ -2649,7 +2746,7 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring)
 	}
 }
 
-static int hns3_init_all_ring(struct hns3_nic_priv *priv)
+int hns3_init_all_ring(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
 	int ring_num = h->kinfo.num_tqps * 2;
@@ -2673,12 +2770,12 @@ static int hns3_init_all_ring(struct hns3_nic_priv *priv)
 
 out_when_alloc_ring_memory:
 	for (j = i - 1; j >= 0; j--)
-		hns3_fini_ring(priv->ring_data[i].ring);
+		hns3_fini_ring(priv->ring_data[j].ring);
 
 	return -ENOMEM;
 }
 
-static int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
+int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
 	int i;
@@ -2748,6 +2845,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	priv->dev = &pdev->dev;
 	priv->netdev = netdev;
 	priv->ae_handle = handle;
+	priv->last_reset_time = jiffies;
+	priv->reset_level = HNAE3_FUNC_RESET;
+	priv->tx_timeout_count = 0;
 
 	handle->kinfo.netdev = netdev;
 	handle->priv = (void *)priv;
@@ -2790,6 +2890,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
 		goto out_reg_netdev_fail;
 	}
 
+	hns3_dcbnl_setup(handle);
+
 	/* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */
 	netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
 
@@ -2846,10 +2948,224 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
 	}
 }
 
-const struct hnae3_client_ops client_ops = {
+static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct net_device *ndev = kinfo->netdev;
+	bool if_running;
+	int ret;
+	u8 i;
+
+	if (tc > HNAE3_MAX_TC)
+		return -EINVAL;
+
+	if (!ndev)
+		return -ENODEV;
+
+	if_running = netif_running(ndev);
+
+	ret = netdev_set_num_tc(ndev, tc);
+	if (ret)
+		return ret;
+
+	if (if_running) {
+		(void)hns3_nic_net_stop(ndev);
+		msleep(100);
+	}
+
+	ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ?
+		kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP;
+	if (ret)
+		goto err_out;
+
+	if (tc <= 1) {
+		netdev_reset_tc(ndev);
+		goto out;
+	}
+
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		struct hnae3_tc_info *tc_info = &kinfo->tc_info[i];
+
+		if (tc_info->enable)
+			netdev_set_tc_queue(ndev,
+					    tc_info->tc,
+					    tc_info->tqp_count,
+					    tc_info->tqp_offset);
+	}
+
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+		netdev_set_prio_tc_map(ndev, i,
+				       kinfo->prio_tc[i]);
+	}
+
+out:
+	ret = hns3_nic_set_real_num_queue(ndev);
+
+err_out:
+	if (if_running)
+		(void)hns3_nic_net_open(ndev);
+
+	return ret;
+}
+
+static void hns3_recover_hw_addr(struct net_device *ndev)
+{
+	struct netdev_hw_addr_list *list;
+	struct netdev_hw_addr *ha, *tmp;
+
+	/* go through and sync uc_addr entries to the device */
+	list = &ndev->uc;
+	list_for_each_entry_safe(ha, tmp, &list->list, list)
+		hns3_nic_uc_sync(ndev, ha->addr);
+
+	/* go through and sync mc_addr entries to the device */
+	list = &ndev->mc;
+	list_for_each_entry_safe(ha, tmp, &list->list, list)
+		hns3_nic_mc_sync(ndev, ha->addr);
+}
+
+static void hns3_drop_skb_data(struct hns3_enet_ring *ring, struct sk_buff *skb)
+{
+	dev_kfree_skb_any(skb);
+}
+
+static void hns3_clear_all_ring(struct hnae3_handle *h)
+{
+	struct net_device *ndev = h->kinfo.netdev;
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	u32 i;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		struct netdev_queue *dev_queue;
+		struct hns3_enet_ring *ring;
+
+		ring = priv->ring_data[i].ring;
+		hns3_clean_tx_ring(ring, ring->desc_num);
+		dev_queue = netdev_get_tx_queue(ndev,
+						priv->ring_data[i].queue_index);
+		netdev_tx_reset_queue(dev_queue);
+
+		ring = priv->ring_data[i + h->kinfo.num_tqps].ring;
+		hns3_clean_rx_ring(ring, ring->desc_num, hns3_drop_skb_data);
+	}
+}
+
+static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct net_device *ndev = kinfo->netdev;
+
+	if (!netif_running(ndev))
+		return -EIO;
+
+	return hns3_nic_net_stop(ndev);
+}
+
+static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
+	int ret = 0;
+
+	if (netif_running(kinfo->netdev)) {
+		ret = hns3_nic_net_up(kinfo->netdev);
+		if (ret) {
+			netdev_err(kinfo->netdev,
+				   "hns net up fail, ret=%d!\n", ret);
+			return ret;
+		}
+
+		priv->last_reset_time = jiffies;
+	}
+
+	return ret;
+}
+
+static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
+{
+	struct net_device *netdev = handle->kinfo.netdev;
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	priv->reset_level = 1;
+	hns3_init_mac_addr(netdev);
+	hns3_nic_set_rx_mode(netdev);
+	hns3_recover_hw_addr(netdev);
+
+	/* Carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	ret = hns3_get_ring_config(priv);
+	if (ret)
+		return ret;
+
+	ret = hns3_nic_init_vector_data(priv);
+	if (ret)
+		return ret;
+
+	ret = hns3_init_all_ring(priv);
+	if (ret) {
+		hns3_nic_uninit_vector_data(priv);
+		priv->ring_data = NULL;
+	}
+
+	return ret;
+}
+
+static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
+{
+	struct net_device *netdev = handle->kinfo.netdev;
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	hns3_clear_all_ring(handle);
+
+	ret = hns3_nic_uninit_vector_data(priv);
+	if (ret) {
+		netdev_err(netdev, "uninit vector error\n");
+		return ret;
+	}
+
+	ret = hns3_uninit_all_ring(priv);
+	if (ret)
+		netdev_err(netdev, "uninit ring error\n");
+
+	priv->ring_data = NULL;
+
+	return ret;
+}
+
+static int hns3_reset_notify(struct hnae3_handle *handle,
+			     enum hnae3_reset_notify_type type)
+{
+	int ret = 0;
+
+	switch (type) {
+	case HNAE3_UP_CLIENT:
+                ret = hns3_reset_notify_up_enet(handle);
+                break;
+	case HNAE3_DOWN_CLIENT:
+		ret = hns3_reset_notify_down_enet(handle);
+		break;
+	case HNAE3_INIT_CLIENT:
+		ret = hns3_reset_notify_init_enet(handle);
+		break;
+	case HNAE3_UNINIT_CLIENT:
+		ret = hns3_reset_notify_uninit_enet(handle);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static const struct hnae3_client_ops client_ops = {
 	.init_instance = hns3_client_init,
 	.uninit_instance = hns3_client_uninit,
 	.link_status_change = hns3_link_status_change,
+	.setup_tc = hns3_client_setup_tc,
+	.reset_notify = hns3_reset_notify,
 };
 
 /* hns3_init_module - Driver registration routine
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
index 7e8746189747..8a9de759957b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
@@ -76,6 +76,8 @@ enum hns3_nic_state {
 #define HNS3_RING_NAME_LEN			16
 #define HNS3_BUFFER_SIZE_2048			2048
 #define HNS3_RING_MAX_PENDING			32768
+#define HNS3_RING_MIN_PENDING			8
+#define HNS3_RING_BD_MULTIPLE			8
 #define HNS3_MAX_MTU				9728
 
 #define HNS3_BD_SIZE_512_TYPE			0
@@ -516,6 +518,8 @@ struct hns3_nic_priv {
 	/* The most recently read link state */
 	int link;
 	u64 tx_timeout_count;
+	enum hnae3_reset_type reset_level;
+	unsigned long last_reset_time;
 
 	unsigned long state;
 
@@ -587,7 +591,23 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
 #define hns3_for_each_ring(pos, head) \
 	for (pos = (head).ring; pos; pos = pos->next)
 
+#define hns3_get_handle(ndev) \
+	(((struct hns3_nic_priv *)netdev_priv(ndev))->ae_handle)
+
 void hns3_ethtool_set_ops(struct net_device *netdev);
 
-int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+int hns3_init_all_ring(struct hns3_nic_priv *priv);
+int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
+netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
+int hns3_clean_rx_ring(
+		struct hns3_enet_ring *ring, int budget,
+		void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *));
+
+#ifdef CONFIG_HNS3_DCB
+void hns3_dcbnl_setup(struct hnae3_handle *handle);
+#else
+static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {}
+#endif
+
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
index d636399232fb..a21470c72da3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
@@ -9,6 +9,7 @@
 
 #include <linux/etherdevice.h>
 #include <linux/string.h>
+#include <linux/phy.h>
 
 #include "hns3_enet.h"
 
@@ -59,6 +60,16 @@ static const struct hns3_stats hns3_rxq_stats[] = {
 
 #define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT)
 
+#define HNS3_SELF_TEST_TPYE_NUM		1
+#define HNS3_NIC_LB_TEST_PKT_NUM	1
+#define HNS3_NIC_LB_TEST_RING_ID	0
+#define HNS3_NIC_LB_TEST_PACKET_SIZE	128
+
+/* Nic loopback test err  */
+#define HNS3_NIC_LB_TEST_NO_MEM_ERR	1
+#define HNS3_NIC_LB_TEST_TX_CNT_ERR	2
+#define HNS3_NIC_LB_TEST_RX_CNT_ERR	3
+
 struct hns3_link_mode_mapping {
 	u32 hns3_link_mode;
 	u32 ethtool_link_mode;
@@ -77,6 +88,268 @@ static const struct hns3_link_mode_mapping hns3_lm_map[] = {
 	{HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
 };
 
+static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+	int ret;
+
+	if (!h->ae_algo->ops->set_loopback ||
+	    !h->ae_algo->ops->set_promisc_mode)
+		return -EOPNOTSUPP;
+
+	switch (loop) {
+	case HNAE3_MAC_INTER_LOOP_MAC:
+		ret = h->ae_algo->ops->set_loopback(h, loop, true);
+		break;
+	case HNAE3_MAC_LOOP_NONE:
+		ret = h->ae_algo->ops->set_loopback(h,
+			HNAE3_MAC_INTER_LOOP_MAC, false);
+		break;
+	default:
+		ret = -ENOTSUPP;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	if (loop == HNAE3_MAC_LOOP_NONE)
+		h->ae_algo->ops->set_promisc_mode(h, ndev->flags & IFF_PROMISC);
+	else
+		h->ae_algo->ops->set_promisc_mode(h, 1);
+
+	return ret;
+}
+
+static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+	int ret;
+
+	if (!h->ae_algo->ops->start)
+		return -EOPNOTSUPP;
+
+	ret = h->ae_algo->ops->start(h);
+	if (ret) {
+		netdev_err(ndev,
+			   "hns3_lb_up ae start return error: %d\n", ret);
+		return ret;
+	}
+
+	ret = hns3_lp_setup(ndev, loop_mode);
+	usleep_range(10000, 20000);
+
+	return ret;
+}
+
+static int hns3_lp_down(struct net_device *ndev)
+{
+	struct hnae3_handle *h = hns3_get_handle(ndev);
+	int ret;
+
+	if (!h->ae_algo->ops->stop)
+		return -EOPNOTSUPP;
+
+	ret = hns3_lp_setup(ndev, HNAE3_MAC_LOOP_NONE);
+	if (ret) {
+		netdev_err(ndev, "lb_setup return error: %d\n", ret);
+		return ret;
+	}
+
+	h->ae_algo->ops->stop(h);
+	usleep_range(10000, 20000);
+
+	return 0;
+}
+
+static void hns3_lp_setup_skb(struct sk_buff *skb)
+{
+	struct net_device *ndev = skb->dev;
+	unsigned char *packet;
+	struct ethhdr *ethh;
+	unsigned int i;
+
+	skb_reserve(skb, NET_IP_ALIGN);
+	ethh = skb_put(skb, sizeof(struct ethhdr));
+	packet = skb_put(skb, HNS3_NIC_LB_TEST_PACKET_SIZE);
+
+	memcpy(ethh->h_dest, ndev->dev_addr, ETH_ALEN);
+	eth_zero_addr(ethh->h_source);
+	ethh->h_proto = htons(ETH_P_ARP);
+	skb_reset_mac_header(skb);
+
+	for (i = 0; i < HNS3_NIC_LB_TEST_PACKET_SIZE; i++)
+		packet[i] = (unsigned char)(i & 0xff);
+}
+
+static void hns3_lb_check_skb_data(struct hns3_enet_ring *ring,
+				   struct sk_buff *skb)
+{
+	struct hns3_enet_tqp_vector *tqp_vector = ring->tqp_vector;
+	unsigned char *packet = skb->data;
+	u32 i;
+
+	for (i = 0; i < skb->len; i++)
+		if (packet[i] != (unsigned char)(i & 0xff))
+			break;
+
+	/* The packet is correctly received */
+	if (i == skb->len)
+		tqp_vector->rx_group.total_packets++;
+	else
+		print_hex_dump(KERN_ERR, "selftest:", DUMP_PREFIX_OFFSET, 16, 1,
+			       skb->data, skb->len, true);
+
+	dev_kfree_skb_any(skb);
+}
+
+static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_knic_private_info *kinfo;
+	u32 i, rcv_good_pkt_total = 0;
+
+	kinfo = &h->kinfo;
+	for (i = kinfo->num_tqps; i < kinfo->num_tqps * 2; i++) {
+		struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+		struct hns3_enet_ring_group *rx_group;
+		u64 pre_rx_pkt;
+
+		rx_group = &ring->tqp_vector->rx_group;
+		pre_rx_pkt = rx_group->total_packets;
+
+		hns3_clean_rx_ring(ring, budget, hns3_lb_check_skb_data);
+
+		rcv_good_pkt_total += (rx_group->total_packets - pre_rx_pkt);
+		rx_group->total_packets = pre_rx_pkt;
+	}
+	return rcv_good_pkt_total;
+}
+
+static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid,
+				  u32 end_ringid, u32 budget)
+{
+	u32 i;
+
+	for (i = start_ringid; i <= end_ringid; i++) {
+		struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+
+		hns3_clean_tx_ring(ring, budget);
+	}
+}
+
+/**
+ * hns3_lp_run_test -  run loopback test
+ * @ndev: net device
+ * @mode: loopback type
+ */
+static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode)
+{
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct sk_buff *skb;
+	u32 i, good_cnt;
+	int ret_val = 0;
+
+	skb = alloc_skb(HNS3_NIC_LB_TEST_PACKET_SIZE + ETH_HLEN + NET_IP_ALIGN,
+			GFP_KERNEL);
+	if (!skb)
+		return HNS3_NIC_LB_TEST_NO_MEM_ERR;
+
+	skb->dev = ndev;
+	hns3_lp_setup_skb(skb);
+	skb->queue_mapping = HNS3_NIC_LB_TEST_RING_ID;
+
+	good_cnt = 0;
+	for (i = 0; i < HNS3_NIC_LB_TEST_PKT_NUM; i++) {
+		netdev_tx_t tx_ret;
+
+		skb_get(skb);
+		tx_ret = hns3_nic_net_xmit(skb, ndev);
+		if (tx_ret == NETDEV_TX_OK)
+			good_cnt++;
+		else
+			netdev_err(ndev, "hns3_lb_run_test xmit failed: %d\n",
+				   tx_ret);
+	}
+	if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) {
+		ret_val = HNS3_NIC_LB_TEST_TX_CNT_ERR;
+		netdev_err(ndev, "mode %d sent fail, cnt=0x%x, budget=0x%x\n",
+			   mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM);
+		goto out;
+	}
+
+	/* Allow 200 milliseconds for packets to go from Tx to Rx */
+	msleep(200);
+
+	good_cnt = hns3_lb_check_rx_ring(priv, HNS3_NIC_LB_TEST_PKT_NUM);
+	if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) {
+		ret_val = HNS3_NIC_LB_TEST_RX_CNT_ERR;
+		netdev_err(ndev, "mode %d recv fail, cnt=0x%x, budget=0x%x\n",
+			   mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM);
+	}
+
+out:
+	hns3_lb_clear_tx_ring(priv, HNS3_NIC_LB_TEST_RING_ID,
+			      HNS3_NIC_LB_TEST_RING_ID,
+			      HNS3_NIC_LB_TEST_PKT_NUM);
+
+	kfree_skb(skb);
+	return ret_val;
+}
+
+/**
+ * hns3_nic_self_test - self test
+ * @ndev: net device
+ * @eth_test: test cmd
+ * @data: test result
+ */
+static void hns3_self_test(struct net_device *ndev,
+			   struct ethtool_test *eth_test, u64 *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int st_param[HNS3_SELF_TEST_TPYE_NUM][2];
+	bool if_running = netif_running(ndev);
+	int test_index = 0;
+	u32 i;
+
+	/* Only do offline selftest, or pass by default */
+	if (eth_test->flags != ETH_TEST_FL_OFFLINE)
+		return;
+
+	st_param[HNAE3_MAC_INTER_LOOP_MAC][0] = HNAE3_MAC_INTER_LOOP_MAC;
+	st_param[HNAE3_MAC_INTER_LOOP_MAC][1] =
+			h->flags & HNAE3_SUPPORT_MAC_LOOPBACK;
+
+	if (if_running)
+		dev_close(ndev);
+
+	set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+
+	for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) {
+		enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0];
+
+		if (!st_param[i][1])
+			continue;
+
+		data[test_index] = hns3_lp_up(ndev, loop_type);
+		if (!data[test_index]) {
+			data[test_index] = hns3_lp_run_test(ndev, loop_type);
+			hns3_lp_down(ndev);
+		}
+
+		if (data[test_index])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		test_index++;
+	}
+
+	clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+
+	if (if_running)
+		dev_open(ndev);
+}
+
 static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
 				  bool is_advertised)
 {
@@ -86,24 +359,18 @@ static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
 		if (!(caps & hns3_lm_map[i].hns3_link_mode))
 			continue;
 
-		if (is_advertised) {
-			ethtool_link_ksettings_zero_link_mode(cmd,
-							      advertising);
+		if (is_advertised)
 			__set_bit(hns3_lm_map[i].ethtool_link_mode,
 				  cmd->link_modes.advertising);
-		} else {
-			ethtool_link_ksettings_zero_link_mode(cmd,
-							      supported);
+		else
 			__set_bit(hns3_lm_map[i].ethtool_link_mode,
 				  cmd->link_modes.supported);
-		}
 	}
 }
 
 static int hns3_get_sset_count(struct net_device *netdev, int stringset)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
 
 	if (!ops->get_sset_count)
@@ -164,8 +431,7 @@ static u8 *hns3_get_strings_tqps(struct hnae3_handle *handle, u8 *data)
 
 static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
 	char *buff = (char *)data;
 
@@ -217,11 +483,10 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
  * @stats: statistics info.
  * @data: statistics data.
  */
-void hns3_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
-		    u64 *data)
+static void hns3_get_stats(struct net_device *netdev,
+			   struct ethtool_stats *stats, u64 *data)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	u64 *p = data;
 
 	if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) {
@@ -262,10 +527,7 @@ static void hns3_get_drvinfo(struct net_device *netdev,
 
 static u32 hns3_get_link(struct net_device *netdev)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h;
-
-	h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status)
 		return h->ae_algo->ops->get_status(h);
@@ -277,7 +539,8 @@ static void hns3_get_ringparam(struct net_device *netdev,
 			       struct ethtool_ringparam *param)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	int queue_num = priv->ae_handle->kinfo.num_tqps;
+	struct hnae3_handle *h = priv->ae_handle;
+	int queue_num = h->kinfo.num_tqps;
 
 	param->tx_max_pending = HNS3_RING_MAX_PENDING;
 	param->rx_max_pending = HNS3_RING_MAX_PENDING;
@@ -289,8 +552,7 @@ static void hns3_get_ringparam(struct net_device *netdev,
 static void hns3_get_pauseparam(struct net_device *netdev,
 				struct ethtool_pauseparam *param)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam)
 		h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
@@ -300,32 +562,30 @@ static void hns3_get_pauseparam(struct net_device *netdev,
 static int hns3_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 	u32 supported_caps;
 	u32 advertised_caps;
 	u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
 	u8 link_stat;
-	u8 auto_neg;
-	u8 duplex;
-	u32 speed;
 
 	if (!h->ae_algo || !h->ae_algo->ops)
 		return -EOPNOTSUPP;
 
 	/* 1.auto_neg & speed & duplex from cmd */
-	if (h->ae_algo->ops->get_ksettings_an_result) {
-		h->ae_algo->ops->get_ksettings_an_result(h, &auto_neg,
-							 &speed, &duplex);
-		cmd->base.autoneg = auto_neg;
-		cmd->base.speed = speed;
-		cmd->base.duplex = duplex;
-
-		link_stat = hns3_get_link(netdev);
-		if (!link_stat) {
-			cmd->base.speed = (u32)SPEED_UNKNOWN;
-			cmd->base.duplex = DUPLEX_UNKNOWN;
-		}
+	if (netdev->phydev)
+		phy_ethtool_ksettings_get(netdev->phydev, cmd);
+	else if (h->ae_algo->ops->get_ksettings_an_result)
+		h->ae_algo->ops->get_ksettings_an_result(h,
+							 &cmd->base.autoneg,
+							 &cmd->base.speed,
+							 &cmd->base.duplex);
+	else
+		return -EOPNOTSUPP;
+
+	link_stat = hns3_get_link(netdev);
+	if (!link_stat) {
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
 	/* 2.media_type get from bios parameter block */
@@ -375,6 +635,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 			break;
 		}
 
+		if (!cmd->base.autoneg)
+			advertised_caps &= ~HNS3_LM_AUTONEG_BIT;
+
 		/* now, map driver link modes to ethtool link modes */
 		hns3_driv_to_eth_caps(supported_caps, cmd, false);
 		hns3_driv_to_eth_caps(advertised_caps, cmd, true);
@@ -390,10 +653,19 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 	return 0;
 }
 
+static int hns3_set_link_ksettings(struct net_device *netdev,
+				   const struct ethtool_link_ksettings *cmd)
+{
+	/* Only support ksettings_set for netdev with phy attached for now */
+	if (netdev->phydev)
+		return phy_ethtool_ksettings_set(netdev->phydev, cmd);
+
+	return -EOPNOTSUPP;
+}
+
 static u32 hns3_get_rss_key_size(struct net_device *netdev)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (!h->ae_algo || !h->ae_algo->ops ||
 	    !h->ae_algo->ops->get_rss_key_size)
@@ -404,8 +676,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev)
 
 static u32 hns3_get_rss_indir_size(struct net_device *netdev)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (!h->ae_algo || !h->ae_algo->ops ||
 	    !h->ae_algo->ops->get_rss_indir_size)
@@ -417,8 +688,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev)
 static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key,
 			u8 *hfunc)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss)
 		return -EOPNOTSUPP;
@@ -429,8 +699,7 @@ static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key,
 static int hns3_set_rss(struct net_device *netdev, const u32 *indir,
 			const u8 *key, const u8 hfunc)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
 	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss)
 		return -EOPNOTSUPP;
@@ -454,16 +723,17 @@ static int hns3_get_rxnfc(struct net_device *netdev,
 			  struct ethtool_rxnfc *cmd,
 			  u32 *rule_locs)
 {
-	struct hns3_nic_priv *priv = netdev_priv(netdev);
-	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_handle *h = hns3_get_handle(netdev);
 
-	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_tc_size)
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_tuple)
 		return -EOPNOTSUPP;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
-		cmd->data = h->ae_algo->ops->get_tc_size(h);
+		cmd->data = h->kinfo.num_tc * h->kinfo.rss_size;
 		break;
+	case ETHTOOL_GRXFH:
+		return h->ae_algo->ops->get_rss_tuple(h, cmd);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -471,20 +741,133 @@ static int hns3_get_rxnfc(struct net_device *netdev,
 	return 0;
 }
 
+static int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
+				       u32 new_desc_num)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	int i;
+
+	h->kinfo.num_desc = new_desc_num;
+
+	for (i = 0; i < h->kinfo.num_tqps * 2; i++)
+		priv->ring_data[i].ring->desc_num = new_desc_num;
+
+	return hns3_init_all_ring(priv);
+}
+
+static int hns3_set_ringparam(struct net_device *ndev,
+			      struct ethtool_ringparam *param)
+{
+	struct hns3_nic_priv *priv = netdev_priv(ndev);
+	struct hnae3_handle *h = priv->ae_handle;
+	bool if_running = netif_running(ndev);
+	u32 old_desc_num, new_desc_num;
+	int ret;
+
+	if (param->rx_mini_pending || param->rx_jumbo_pending)
+		return -EINVAL;
+
+	if (param->tx_pending != param->rx_pending) {
+		netdev_err(ndev,
+			   "Descriptors of tx and rx must be equal");
+		return -EINVAL;
+	}
+
+	if (param->tx_pending > HNS3_RING_MAX_PENDING ||
+	    param->tx_pending < HNS3_RING_MIN_PENDING) {
+		netdev_err(ndev,
+			   "Descriptors requested (Tx/Rx: %d) out of range [%d-%d]\n",
+			   param->tx_pending, HNS3_RING_MIN_PENDING,
+			   HNS3_RING_MAX_PENDING);
+		return -EINVAL;
+	}
+
+	new_desc_num = param->tx_pending;
+
+	/* Hardware requires that its descriptors must be multiple of eight */
+	new_desc_num = ALIGN(new_desc_num, HNS3_RING_BD_MULTIPLE);
+	old_desc_num = h->kinfo.num_desc;
+	if (old_desc_num == new_desc_num)
+		return 0;
+
+	netdev_info(ndev,
+		    "Changing descriptor count from %d to %d.\n",
+		    old_desc_num, new_desc_num);
+
+	if (if_running)
+		dev_close(ndev);
+
+	ret = hns3_uninit_all_ring(priv);
+	if (ret)
+		return ret;
+
+	ret = hns3_change_all_ring_bd_num(priv, new_desc_num);
+	if (ret) {
+		ret = hns3_change_all_ring_bd_num(priv, old_desc_num);
+		if (ret) {
+			netdev_err(ndev,
+				   "Revert to old bd num fail, ret=%d.\n", ret);
+			return ret;
+		}
+	}
+
+	if (if_running)
+		ret = dev_open(ndev);
+
+	return ret;
+}
+
+static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct hnae3_handle *h = hns3_get_handle(netdev);
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss_tuple)
+		return -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		return h->ae_algo->ops->set_rss_tuple(h, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int hns3_nway_reset(struct net_device *netdev)
+{
+	struct phy_device *phy = netdev->phydev;
+
+	if (!netif_running(netdev))
+		return 0;
+
+	/* Only support nway_reset for netdev with phy attached for now */
+	if (!phy)
+		return -EOPNOTSUPP;
+
+	if (phy->autoneg != AUTONEG_ENABLE)
+		return -EINVAL;
+
+	return genphy_restart_aneg(phy);
+}
+
 static const struct ethtool_ops hns3_ethtool_ops = {
+	.self_test = hns3_self_test,
 	.get_drvinfo = hns3_get_drvinfo,
 	.get_link = hns3_get_link,
 	.get_ringparam = hns3_get_ringparam,
+	.set_ringparam = hns3_set_ringparam,
 	.get_pauseparam = hns3_get_pauseparam,
 	.get_strings = hns3_get_strings,
 	.get_ethtool_stats = hns3_get_stats,
 	.get_sset_count = hns3_get_sset_count,
 	.get_rxnfc = hns3_get_rxnfc,
+	.set_rxnfc = hns3_set_rxnfc,
 	.get_rxfh_key_size = hns3_get_rss_key_size,
 	.get_rxfh_indir_size = hns3_get_rss_indir_size,
 	.get_rxfh = hns3_get_rss,
 	.set_rxfh = hns3_set_rss,
 	.get_link_ksettings = hns3_get_link_ksettings,
+	.set_link_ksettings = hns3_set_link_ksettings,
+	.nway_reset = hns3_nway_reset,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 1d4f712b15a8..e2e5cdc7119c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -26,6 +26,7 @@
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
 #include <linux/prefetch.h>
+#include <linux/cpumask.h>
 #include <asm/barrier.h>
 
 #include "hinic_common.h"
@@ -171,11 +172,10 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
 	struct hinic_sge sge;
 	dma_addr_t dma_addr;
 	struct sk_buff *skb;
-	int i, alloc_more;
 	u16 prod_idx;
+	int i;
 
 	free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq);
-	alloc_more = 0;
 
 	/* Limit the allocation chunks */
 	if (free_wqebbs > nic_dev->rx_weight)
@@ -185,7 +185,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
 		skb = rx_alloc_skb(rxq, &dma_addr);
 		if (!skb) {
 			netdev_err(rxq->netdev, "Failed to alloc Rx skb\n");
-			alloc_more = 1;
 			goto skb_out;
 		}
 
@@ -195,7 +194,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
 					  &prod_idx);
 		if (!rq_wqe) {
 			rx_free_skb(rxq, skb, dma_addr);
-			alloc_more = 1;
 			goto skb_out;
 		}
 
@@ -211,9 +209,7 @@ skb_out:
 		hinic_rq_update(rxq->rq, prod_idx);
 	}
 
-	if (alloc_more)
-		tasklet_schedule(&rxq->rx_task);
-
+	tasklet_schedule(&rxq->rx_task);
 	return i;
 }
 
@@ -357,7 +353,7 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
 	}
 
 	if (pkts)
-		tasklet_schedule(&rxq->rx_task); /* hinic_rx_alloc_pkts */
+		tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */
 
 	u64_stats_update_begin(&rxq->rxq_stats.syncp);
 	rxq->rxq_stats.pkts += pkts;
@@ -417,6 +413,8 @@ static int rx_request_irq(struct hinic_rxq *rxq)
 	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_rq *rq = rxq->rq;
+	struct hinic_qp *qp;
+	struct cpumask mask;
 	int err;
 
 	rx_add_napi(rxq);
@@ -432,7 +430,9 @@ static int rx_request_irq(struct hinic_rxq *rxq)
 		return err;
 	}
 
-	return 0;
+	qp = container_of(rq, struct hinic_qp, rq);
+	cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask);
+	return irq_set_affinity_hint(rq->irq, &mask);
 }
 
 static void rx_free_irq(struct hinic_rxq *rxq)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index abe3e38cd342..9128858479c4 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -212,10 +212,19 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 	sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx);
 	if (!sq_wqe) {
-		tx_unmap_skb(nic_dev, skb, txq->sges);
-
 		netif_stop_subqueue(netdev, qp->q_id);
 
+		/* Check for the case free_tx_poll is called in another cpu
+		 * and we stopped the subqueue after free_tx_poll check.
+		 */
+		sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx);
+		if (sq_wqe) {
+			netif_wake_subqueue(nic_dev->netdev, qp->q_id);
+			goto process_sq_wqe;
+		}
+
+		tx_unmap_skb(nic_dev, skb, txq->sges);
+
 		u64_stats_update_begin(&txq->txq_stats.syncp);
 		txq->txq_stats.tx_busy++;
 		u64_stats_update_end(&txq->txq_stats.syncp);
@@ -223,6 +232,7 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		goto flush_skbs;
 	}
 
+process_sq_wqe:
 	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
 
 	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c66abd476023..04aaacbc3d45 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -75,6 +75,7 @@
 #include <asm/firmware.h>
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
+#include <linux/utsname.h>
 
 #include "ibmvnic.h"
 
@@ -115,6 +116,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
 static int ibmvnic_init(struct ibmvnic_adapter *);
 static void release_crq_queue(struct ibmvnic_adapter *);
+static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p);
 
 struct ibmvnic_stat {
 	char name[ETH_GSTRING_LEN];
@@ -553,6 +555,10 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 		if (rc)
 			return rc;
 
+		rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb);
+		if (rc)
+			return rc;
+
 		memset(tx_pool->tx_buff, 0,
 		       adapter->req_tx_entries_per_subcrq *
 		       sizeof(struct ibmvnic_tx_buff));
@@ -562,11 +568,21 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 
 		tx_pool->consumer_index = 0;
 		tx_pool->producer_index = 0;
+		tx_pool->tso_index = 0;
 	}
 
 	return 0;
 }
 
+static void release_vpd_data(struct ibmvnic_adapter *adapter)
+{
+	if (!adapter->vpd)
+		return;
+
+	kfree(adapter->vpd->buff);
+	kfree(adapter->vpd);
+}
+
 static void release_tx_pools(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_tx_pool *tx_pool;
@@ -581,6 +597,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 		tx_pool = &adapter->tx_pool[i];
 		kfree(tx_pool->tx_buff);
 		free_long_term_buff(adapter, &tx_pool->long_term_buff);
+		free_long_term_buff(adapter, &tx_pool->tso_ltb);
 		kfree(tx_pool->free_map);
 	}
 
@@ -625,6 +642,16 @@ static int init_tx_pools(struct net_device *netdev)
 			return -1;
 		}
 
+		/* alloc TSO ltb */
+		if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb,
+					 IBMVNIC_TSO_BUFS *
+					 IBMVNIC_TSO_BUF_SZ)) {
+			release_tx_pools(adapter);
+			return -1;
+		}
+
+		tx_pool->tso_index = 0;
+
 		tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
 					    sizeof(int), GFP_KERNEL);
 		if (!tx_pool->free_map) {
@@ -736,6 +763,8 @@ static void release_resources(struct ibmvnic_adapter *adapter)
 {
 	int i;
 
+	release_vpd_data(adapter);
+
 	release_tx_pools(adapter);
 	release_rx_pools(adapter);
 
@@ -816,6 +845,57 @@ static int set_real_num_queues(struct net_device *netdev)
 	return rc;
 }
 
+static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
+{
+	struct device *dev = &adapter->vdev->dev;
+	union ibmvnic_crq crq;
+	dma_addr_t dma_addr;
+	int len = 0;
+
+	if (adapter->vpd->buff)
+		len = adapter->vpd->len;
+
+	reinit_completion(&adapter->fw_done);
+	crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
+	crq.get_vpd_size.cmd = GET_VPD_SIZE;
+	ibmvnic_send_crq(adapter, &crq);
+	wait_for_completion(&adapter->fw_done);
+
+	if (!adapter->vpd->len)
+		return -ENODATA;
+
+	if (!adapter->vpd->buff)
+		adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL);
+	else if (adapter->vpd->len != len)
+		adapter->vpd->buff =
+			krealloc(adapter->vpd->buff,
+				 adapter->vpd->len, GFP_KERNEL);
+
+	if (!adapter->vpd->buff) {
+		dev_err(dev, "Could allocate VPD buffer\n");
+		return -ENOMEM;
+	}
+
+	adapter->vpd->dma_addr =
+		dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len,
+			       DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dma_addr)) {
+		dev_err(dev, "Could not map VPD buffer\n");
+		kfree(adapter->vpd->buff);
+		return -ENOMEM;
+	}
+
+	reinit_completion(&adapter->fw_done);
+	crq.get_vpd.first = IBMVNIC_CRQ_CMD;
+	crq.get_vpd.cmd = GET_VPD;
+	crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr);
+	crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len);
+	ibmvnic_send_crq(adapter, &crq);
+	wait_for_completion(&adapter->fw_done);
+
+	return 0;
+}
+
 static int init_resources(struct ibmvnic_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -833,6 +913,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	if (rc)
 		return rc;
 
+	adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
+	if (!adapter->vpd)
+		return -ENOMEM;
+
 	adapter->map_id = 1;
 	adapter->napi = kcalloc(adapter->req_rx_queues,
 				sizeof(struct napi_struct), GFP_KERNEL);
@@ -906,10 +990,15 @@ static int __ibmvnic_open(struct net_device *netdev)
 static int ibmvnic_open(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	int rc;
+	int rc, vpd;
 
 	mutex_lock(&adapter->reset_lock);
 
+	if (adapter->mac_change_pending) {
+		__ibmvnic_set_mac(netdev, &adapter->desired.mac);
+		adapter->mac_change_pending = false;
+	}
+
 	if (adapter->state != VNIC_CLOSED) {
 		rc = ibmvnic_login(netdev);
 		if (rc) {
@@ -927,6 +1016,13 @@ static int ibmvnic_open(struct net_device *netdev)
 	}
 
 	rc = __ibmvnic_open(netdev);
+	netif_carrier_on(netdev);
+
+	/* Vital Product Data (VPD) */
+	vpd = ibmvnic_get_vpd(adapter);
+	if (vpd)
+		netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
+
 	mutex_unlock(&adapter->reset_lock);
 
 	return rc;
@@ -1200,11 +1296,41 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
 
 	index = tx_pool->free_map[tx_pool->consumer_index];
-	offset = index * adapter->req_mtu;
-	dst = tx_pool->long_term_buff.buff + offset;
-	memset(dst, 0, adapter->req_mtu);
-	skb_copy_from_linear_data(skb, dst, skb->len);
-	data_dma_addr = tx_pool->long_term_buff.addr + offset;
+
+	if (skb_is_gso(skb)) {
+		offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ;
+		dst = tx_pool->tso_ltb.buff + offset;
+		memset(dst, 0, IBMVNIC_TSO_BUF_SZ);
+		data_dma_addr = tx_pool->tso_ltb.addr + offset;
+		tx_pool->tso_index++;
+		if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
+			tx_pool->tso_index = 0;
+	} else {
+		offset = index * adapter->req_mtu;
+		dst = tx_pool->long_term_buff.buff + offset;
+		memset(dst, 0, adapter->req_mtu);
+		data_dma_addr = tx_pool->long_term_buff.addr + offset;
+	}
+
+	if (skb_shinfo(skb)->nr_frags) {
+		int cur, i;
+
+		/* Copy the head */
+		skb_copy_from_linear_data(skb, dst, skb_headlen(skb));
+		cur = skb_headlen(skb);
+
+		/* Copy the frags */
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			memcpy(dst + cur,
+			       page_address(skb_frag_page(frag)) +
+			       frag->page_offset, skb_frag_size(frag));
+			cur += skb_frag_size(frag);
+		}
+	} else {
+		skb_copy_from_linear_data(skb, dst, skb->len);
+	}
 
 	tx_pool->consumer_index =
 	    (tx_pool->consumer_index + 1) %
@@ -1225,7 +1351,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_crq.v1.n_sge = 1;
 	tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
 	tx_crq.v1.correlator = cpu_to_be32(index);
-	tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
+	if (skb_is_gso(skb))
+		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id);
+	else
+		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
 	tx_crq.v1.sge_len = cpu_to_be32(skb->len);
 	tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
 
@@ -1250,6 +1379,11 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD;
 		hdrs += 2;
 	}
+	if (skb_is_gso(skb)) {
+		tx_crq.v1.flags1 |= IBMVNIC_TX_LSO;
+		tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+		hdrs += 2;
+	}
 	/* determine if l2/3/4 headers are sent to firmware */
 	if ((*hdrs >> 7) & 1 &&
 	    (skb->protocol == htons(ETH_P_IP) ||
@@ -1371,7 +1505,7 @@ static void ibmvnic_set_multi(struct net_device *netdev)
 	}
 }
 
-static int ibmvnic_set_mac(struct net_device *netdev, void *p)
+static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	struct sockaddr *addr = p;
@@ -1389,6 +1523,22 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
 	return 0;
 }
 
+static int ibmvnic_set_mac(struct net_device *netdev, void *p)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	struct sockaddr *addr = p;
+
+	if (adapter->state != VNIC_OPEN) {
+		memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr));
+		adapter->mac_change_pending = true;
+		return 0;
+	}
+
+	__ibmvnic_set_mac(netdev, addr);
+
+	return 0;
+}
+
 /**
  * do_reset returns zero if we are able to keep processing reset events, or
  * non-zero if we hit a fatal error and must halt.
@@ -1415,6 +1565,13 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	if (rc)
 		return rc;
 
+	if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
+	    adapter->wait_for_reset) {
+		release_resources(adapter);
+		release_sub_crqs(adapter);
+		release_crq_queue(adapter);
+	}
+
 	if (adapter->reset_reason != VNIC_RESET_NON_FATAL) {
 		/* remove the closed state so when we call open it appears
 		 * we are coming from the probed state.
@@ -1423,7 +1580,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
 		rc = ibmvnic_init(adapter);
 		if (rc)
-			return 0;
+			return IBMVNIC_INIT_FAILED;
 
 		/* If the adapter was in PROBE state prior to the reset,
 		 * exit here.
@@ -1437,16 +1594,23 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			return 0;
 		}
 
-		rc = reset_tx_pools(adapter);
-		if (rc)
-			return rc;
+		if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
+		    adapter->wait_for_reset) {
+			rc = init_resources(adapter);
+			if (rc)
+				return rc;
+		} else {
+			rc = reset_tx_pools(adapter);
+			if (rc)
+				return rc;
 
-		rc = reset_rx_pools(adapter);
-		if (rc)
-			return rc;
+			rc = reset_rx_pools(adapter);
+			if (rc)
+				return rc;
 
-		if (reset_state == VNIC_CLOSED)
-			return 0;
+			if (reset_state == VNIC_CLOSED)
+				return 0;
+		}
 	}
 
 	rc = __ibmvnic_open(netdev);
@@ -1506,7 +1670,7 @@ static void __ibmvnic_reset(struct work_struct *work)
 	struct ibmvnic_adapter *adapter;
 	struct net_device *netdev;
 	u32 reset_state;
-	int rc;
+	int rc = 0;
 
 	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 	netdev = adapter->netdev;
@@ -1519,12 +1683,18 @@ static void __ibmvnic_reset(struct work_struct *work)
 	while (rwi) {
 		rc = do_reset(adapter, rwi, reset_state);
 		kfree(rwi);
-		if (rc)
+		if (rc && rc != IBMVNIC_INIT_FAILED)
 			break;
 
 		rwi = get_next_rwi(adapter);
 	}
 
+	if (adapter->wait_for_reset) {
+		adapter->wait_for_reset = false;
+		adapter->reset_done_rc = rc;
+		complete(&adapter->reset_done);
+	}
+
 	if (rc) {
 		netdev_dbg(adapter->netdev, "Reset failed\n");
 		free_all_rwi(adapter);
@@ -1704,9 +1874,42 @@ static void ibmvnic_netpoll_controller(struct net_device *dev)
 }
 #endif
 
+static int wait_for_reset(struct ibmvnic_adapter *adapter)
+{
+	adapter->fallback.mtu = adapter->req_mtu;
+	adapter->fallback.rx_queues = adapter->req_rx_queues;
+	adapter->fallback.tx_queues = adapter->req_tx_queues;
+	adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq;
+	adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq;
+
+	init_completion(&adapter->reset_done);
+	ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+	adapter->wait_for_reset = true;
+	wait_for_completion(&adapter->reset_done);
+
+	if (adapter->reset_done_rc) {
+		adapter->desired.mtu = adapter->fallback.mtu;
+		adapter->desired.rx_queues = adapter->fallback.rx_queues;
+		adapter->desired.tx_queues = adapter->fallback.tx_queues;
+		adapter->desired.rx_entries = adapter->fallback.rx_entries;
+		adapter->desired.tx_entries = adapter->fallback.tx_entries;
+
+		init_completion(&adapter->reset_done);
+		ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+		wait_for_completion(&adapter->reset_done);
+	}
+	adapter->wait_for_reset = false;
+
+	return adapter->reset_done_rc;
+}
+
 static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	return -EOPNOTSUPP;
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	adapter->desired.mtu = new_mtu + ETH_HLEN;
+
+	return wait_for_reset(adapter);
 }
 
 static const struct net_device_ops ibmvnic_netdev_ops = {
@@ -1748,11 +1951,15 @@ static int ibmvnic_get_link_ksettings(struct net_device *netdev,
 	return 0;
 }
 
-static void ibmvnic_get_drvinfo(struct net_device *dev,
+static void ibmvnic_get_drvinfo(struct net_device *netdev,
 				struct ethtool_drvinfo *info)
 {
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
 	strlcpy(info->driver, ibmvnic_driver_name, sizeof(info->driver));
 	strlcpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version));
+	strlcpy(info->fw_version, adapter->fw_version,
+		sizeof(info->fw_version));
 }
 
 static u32 ibmvnic_get_msglevel(struct net_device *netdev)
@@ -1794,6 +2001,27 @@ static void ibmvnic_get_ringparam(struct net_device *netdev,
 	ring->rx_jumbo_pending = 0;
 }
 
+static int ibmvnic_set_ringparam(struct net_device *netdev,
+				 struct ethtool_ringparam *ring)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq  ||
+	    ring->tx_pending > adapter->max_tx_entries_per_subcrq) {
+		netdev_err(netdev, "Invalid request.\n");
+		netdev_err(netdev, "Max tx buffers = %llu\n",
+			   adapter->max_rx_add_entries_per_subcrq);
+		netdev_err(netdev, "Max rx buffers = %llu\n",
+			   adapter->max_tx_entries_per_subcrq);
+		return -EINVAL;
+	}
+
+	adapter->desired.rx_entries = ring->rx_pending;
+	adapter->desired.tx_entries = ring->tx_pending;
+
+	return wait_for_reset(adapter);
+}
+
 static void ibmvnic_get_channels(struct net_device *netdev,
 				 struct ethtool_channels *channels)
 {
@@ -1809,6 +2037,17 @@ static void ibmvnic_get_channels(struct net_device *netdev,
 	channels->combined_count = 0;
 }
 
+static int ibmvnic_set_channels(struct net_device *netdev,
+				struct ethtool_channels *channels)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	adapter->desired.rx_queues = channels->rx_count;
+	adapter->desired.tx_queues = channels->tx_count;
+
+	return wait_for_reset(adapter);
+}
+
 static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
@@ -1905,7 +2144,9 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = {
 	.set_msglevel		= ibmvnic_set_msglevel,
 	.get_link		= ibmvnic_get_link,
 	.get_ringparam		= ibmvnic_get_ringparam,
+	.set_ringparam		= ibmvnic_set_ringparam,
 	.get_channels		= ibmvnic_get_channels,
+	.set_channels		= ibmvnic_set_channels,
 	.get_strings            = ibmvnic_get_strings,
 	.get_sset_count         = ibmvnic_get_sset_count,
 	.get_ethtool_stats	= ibmvnic_get_ethtool_stats,
@@ -2371,6 +2612,7 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry)
 {
 	struct device *dev = &adapter->vdev->dev;
 	union ibmvnic_crq crq;
+	int max_entries;
 
 	if (!retry) {
 		/* Sub-CRQ entries are 32 byte long */
@@ -2382,21 +2624,60 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry)
 			return;
 		}
 
-		/* Get the minimum between the queried max and the entries
-		 * that fit in our PAGE_SIZE
-		 */
-		adapter->req_tx_entries_per_subcrq =
-		    adapter->max_tx_entries_per_subcrq > entries_page ?
-		    entries_page : adapter->max_tx_entries_per_subcrq;
-		adapter->req_rx_add_entries_per_subcrq =
-		    adapter->max_rx_add_entries_per_subcrq > entries_page ?
-		    entries_page : adapter->max_rx_add_entries_per_subcrq;
-
-		adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues;
-		adapter->req_rx_queues = adapter->opt_rx_comp_queues;
-		adapter->req_rx_add_queues = adapter->max_rx_add_queues;
+		if (adapter->desired.mtu)
+			adapter->req_mtu = adapter->desired.mtu;
+		else
+			adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN;
+
+		if (!adapter->desired.tx_entries)
+			adapter->desired.tx_entries =
+					adapter->max_tx_entries_per_subcrq;
+		if (!adapter->desired.rx_entries)
+			adapter->desired.rx_entries =
+					adapter->max_rx_add_entries_per_subcrq;
 
-		adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN;
+		max_entries = IBMVNIC_MAX_LTB_SIZE /
+			      (adapter->req_mtu + IBMVNIC_BUFFER_HLEN);
+
+		if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
+			adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) {
+			adapter->desired.tx_entries = max_entries;
+		}
+
+		if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) *
+			adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) {
+			adapter->desired.rx_entries = max_entries;
+		}
+
+		if (adapter->desired.tx_entries)
+			adapter->req_tx_entries_per_subcrq =
+					adapter->desired.tx_entries;
+		else
+			adapter->req_tx_entries_per_subcrq =
+					adapter->max_tx_entries_per_subcrq;
+
+		if (adapter->desired.rx_entries)
+			adapter->req_rx_add_entries_per_subcrq =
+					adapter->desired.rx_entries;
+		else
+			adapter->req_rx_add_entries_per_subcrq =
+					adapter->max_rx_add_entries_per_subcrq;
+
+		if (adapter->desired.tx_queues)
+			adapter->req_tx_queues =
+					adapter->desired.tx_queues;
+		else
+			adapter->req_tx_queues =
+					adapter->opt_tx_comp_sub_queues;
+
+		if (adapter->desired.rx_queues)
+			adapter->req_rx_queues =
+					adapter->desired.rx_queues;
+		else
+			adapter->req_rx_queues =
+					adapter->opt_rx_comp_queues;
+
+		adapter->req_rx_add_queues = adapter->max_rx_add_queues;
 	}
 
 	memset(&crq, 0, sizeof(crq));
@@ -2609,6 +2890,55 @@ static int send_version_xchg(struct ibmvnic_adapter *adapter)
 	return ibmvnic_send_crq(adapter, &crq);
 }
 
+struct vnic_login_client_data {
+	u8	type;
+	__be16	len;
+	char	name;
+} __packed;
+
+static int vnic_client_data_len(struct ibmvnic_adapter *adapter)
+{
+	int len;
+
+	/* Calculate the amount of buffer space needed for the
+	 * vnic client data in the login buffer. There are four entries,
+	 * OS name, LPAR name, device name, and a null last entry.
+	 */
+	len = 4 * sizeof(struct vnic_login_client_data);
+	len += 6; /* "Linux" plus NULL */
+	len += strlen(utsname()->nodename) + 1;
+	len += strlen(adapter->netdev->name) + 1;
+
+	return len;
+}
+
+static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
+				 struct vnic_login_client_data *vlcd)
+{
+	const char *os_name = "Linux";
+	int len;
+
+	/* Type 1 - LPAR OS */
+	vlcd->type = 1;
+	len = strlen(os_name) + 1;
+	vlcd->len = cpu_to_be16(len);
+	strncpy(&vlcd->name, os_name, len);
+	vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len);
+
+	/* Type 2 - LPAR name */
+	vlcd->type = 2;
+	len = strlen(utsname()->nodename) + 1;
+	vlcd->len = cpu_to_be16(len);
+	strncpy(&vlcd->name, utsname()->nodename, len);
+	vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len);
+
+	/* Type 3 - device name */
+	vlcd->type = 3;
+	len = strlen(adapter->netdev->name) + 1;
+	vlcd->len = cpu_to_be16(len);
+	strncpy(&vlcd->name, adapter->netdev->name, len);
+}
+
 static void send_login(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
@@ -2621,13 +2951,18 @@ static void send_login(struct ibmvnic_adapter *adapter)
 	size_t buffer_size;
 	__be64 *tx_list_p;
 	__be64 *rx_list_p;
+	int client_data_len;
+	struct vnic_login_client_data *vlcd;
 	int i;
 
+	client_data_len = vnic_client_data_len(adapter);
+
 	buffer_size =
 	    sizeof(struct ibmvnic_login_buffer) +
-	    sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues);
+	    sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) +
+	    client_data_len;
 
-	login_buffer = kmalloc(buffer_size, GFP_ATOMIC);
+	login_buffer = kzalloc(buffer_size, GFP_ATOMIC);
 	if (!login_buffer)
 		goto buf_alloc_failed;
 
@@ -2694,6 +3029,15 @@ static void send_login(struct ibmvnic_adapter *adapter)
 		}
 	}
 
+	/* Insert vNIC login client data */
+	vlcd = (struct vnic_login_client_data *)
+		((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues));
+	login_buffer->client_data_offset =
+			cpu_to_be32((char *)vlcd - (char *)login_buffer);
+	login_buffer->client_data_len = cpu_to_be32(client_data_len);
+
+	vnic_add_client_data(adapter, vlcd);
+
 	netdev_dbg(adapter->netdev, "Login Buffer:\n");
 	for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) {
 		netdev_dbg(adapter->netdev, "%016lx\n",
@@ -2872,6 +3216,73 @@ static void send_cap_queries(struct ibmvnic_adapter *adapter)
 	ibmvnic_send_crq(adapter, &crq);
 }
 
+static void handle_vpd_size_rsp(union ibmvnic_crq *crq,
+				struct ibmvnic_adapter *adapter)
+{
+	struct device *dev = &adapter->vdev->dev;
+
+	if (crq->get_vpd_size_rsp.rc.code) {
+		dev_err(dev, "Error retrieving VPD size, rc=%x\n",
+			crq->get_vpd_size_rsp.rc.code);
+		complete(&adapter->fw_done);
+		return;
+	}
+
+	adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len);
+	complete(&adapter->fw_done);
+}
+
+static void handle_vpd_rsp(union ibmvnic_crq *crq,
+			   struct ibmvnic_adapter *adapter)
+{
+	struct device *dev = &adapter->vdev->dev;
+	unsigned char *substr = NULL, *ptr = NULL;
+	u8 fw_level_len = 0;
+
+	memset(adapter->fw_version, 0, 32);
+
+	dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len,
+			 DMA_FROM_DEVICE);
+
+	if (crq->get_vpd_rsp.rc.code) {
+		dev_err(dev, "Error retrieving VPD from device, rc=%x\n",
+			crq->get_vpd_rsp.rc.code);
+		goto complete;
+	}
+
+	/* get the position of the firmware version info
+	 * located after the ASCII 'RM' substring in the buffer
+	 */
+	substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len);
+	if (!substr) {
+		dev_info(dev, "No FW level provided by VPD\n");
+		goto complete;
+	}
+
+	/* get length of firmware level ASCII substring */
+	if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) {
+		fw_level_len = *(substr + 2);
+	} else {
+		dev_info(dev, "Length of FW substr extrapolated VDP buff\n");
+		goto complete;
+	}
+
+	/* copy firmware version string from vpd into adapter */
+	if ((substr + 3 + fw_level_len) <
+	    (adapter->vpd->buff + adapter->vpd->len)) {
+		ptr = strncpy((char *)adapter->fw_version,
+			      substr + 3, fw_level_len);
+
+		if (!ptr)
+			dev_err(dev, "Failed to isolate FW level string\n");
+	} else {
+		dev_info(dev, "FW substr extrapolated VPD buff\n");
+	}
+
+complete:
+	complete(&adapter->fw_done);
+}
+
 static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
@@ -2940,14 +3351,14 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 	adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum;
 	adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
 	adapter->ip_offload_ctrl.udp_ipv6_chksum = buf->udp_ipv6_chksum;
+	adapter->ip_offload_ctrl.large_tx_ipv4 = buf->large_tx_ipv4;
+	adapter->ip_offload_ctrl.large_tx_ipv6 = buf->large_tx_ipv6;
 
-	/* large_tx/rx disabled for now, additional features needed */
-	adapter->ip_offload_ctrl.large_tx_ipv4 = 0;
-	adapter->ip_offload_ctrl.large_tx_ipv6 = 0;
+	/* large_rx disabled for now, additional features needed */
 	adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
 	adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
 
-	adapter->netdev->features = NETIF_F_GSO;
+	adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO;
 
 	if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
 		adapter->netdev->features |= NETIF_F_IP_CSUM;
@@ -2959,6 +3370,13 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 	    (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
 		adapter->netdev->features |= NETIF_F_RXCSUM;
 
+	if (buf->large_tx_ipv4)
+		adapter->netdev->features |= NETIF_F_TSO;
+	if (buf->large_tx_ipv6)
+		adapter->netdev->features |= NETIF_F_TSO6;
+
+	adapter->netdev->hw_features |= adapter->netdev->features;
+
 	memset(&crq, 0, sizeof(crq));
 	crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
 	crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD;
@@ -3210,6 +3628,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
 			    struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
+	struct net_device *netdev = adapter->netdev;
 	struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf;
 	struct ibmvnic_login_buffer *login = adapter->login_buf;
 	int i;
@@ -3229,6 +3648,8 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
 		return 0;
 	}
 
+	netdev->mtu = adapter->req_mtu - ETH_HLEN;
+
 	netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
 	for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) {
 		netdev_dbg(adapter->netdev, "%016lx\n",
@@ -3593,6 +4014,12 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 		netdev_dbg(netdev, "Got Collect firmware trace Response\n");
 		complete(&adapter->fw_done);
 		break;
+	case GET_VPD_SIZE_RSP:
+		handle_vpd_size_rsp(crq, adapter);
+		break;
+	case GET_VPD_RSP:
+		handle_vpd_rsp(crq, adapter);
+		break;
 	default:
 		netdev_err(netdev, "Got an invalid cmd type 0x%02x\n",
 			   gen_crq->cmd);
@@ -3784,7 +4211,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 	unsigned long timeout = msecs_to_jiffies(30000);
 	int rc;
 
-	if (adapter->resetting) {
+	if (adapter->resetting && !adapter->wait_for_reset) {
 		rc = ibmvnic_reset_crq(adapter);
 		if (!rc)
 			rc = vio_enable_interrupts(adapter->vdev);
@@ -3818,7 +4245,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		return -1;
 	}
 
-	if (adapter->resetting)
+	if (adapter->resetting && !adapter->wait_for_reset)
 		rc = reset_sub_crq_queues(adapter);
 	else
 		rc = init_sub_crqs(adapter);
@@ -3887,6 +4314,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	mutex_init(&adapter->rwi_lock);
 	adapter->resetting = false;
 
+	adapter->mac_change_pending = false;
+
 	do {
 		rc = ibmvnic_init(adapter);
 		if (rc && rc != EAGAIN)
@@ -3894,11 +4323,14 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	} while (rc == EAGAIN);
 
 	netdev->mtu = adapter->req_mtu - ETH_HLEN;
+	netdev->min_mtu = adapter->min_mtu - ETH_HLEN;
+	netdev->max_mtu = adapter->max_mtu - ETH_HLEN;
 
 	rc = device_create_file(&dev->dev, &dev_attr_failover);
 	if (rc)
 		goto ibmvnic_init_fail;
 
+	netif_carrier_off(netdev);
 	rc = register_netdev(netdev);
 	if (rc) {
 		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
@@ -3907,6 +4339,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
 	adapter->state = VNIC_PROBED;
+
+	adapter->wait_for_reset = false;
+
 	return 0;
 
 ibmvnic_register_fail:
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index d02257ccc377..4487f1e2c266 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -30,6 +30,8 @@
 #define IBMVNIC_DRIVER_VERSION	"1.0.1"
 #define IBMVNIC_INVALID_MAP	-1
 #define IBMVNIC_STATS_TIMEOUT	1
+#define IBMVNIC_INIT_FAILED	2
+
 /* basic structures plus 100 2k buffers */
 #define IBMVNIC_IO_ENTITLEMENT_DEFAULT	610305
 
@@ -39,6 +41,12 @@
 #define IBMVNIC_BUFFS_PER_POOL	100
 #define IBMVNIC_MAX_TX_QUEUES	5
 
+#define IBMVNIC_TSO_BUF_SZ	65536
+#define IBMVNIC_TSO_BUFS	64
+
+#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
+#define IBMVNIC_BUFFER_HLEN 500
+
 struct ibmvnic_login_buffer {
 	__be32 len;
 	__be32 version;
@@ -49,6 +57,8 @@ struct ibmvnic_login_buffer {
 	__be32 off_rxcomp_subcrqs;
 	__be32 login_rsp_ioba;
 	__be32 login_rsp_len;
+	__be32 client_data_offset;
+	__be32 client_data_len;
 } __packed __aligned(8);
 
 struct ibmvnic_login_rsp_buffer {
@@ -550,6 +560,12 @@ struct ibmvnic_multicast_ctrl {
 	struct ibmvnic_rc rc;
 } __packed __aligned(8);
 
+struct ibmvnic_get_vpd_size {
+	u8 first;
+	u8 cmd;
+	u8 reserved[14];
+} __packed __aligned(8);
+
 struct ibmvnic_get_vpd_size_rsp {
 	u8 first;
 	u8 cmd;
@@ -567,6 +583,13 @@ struct ibmvnic_get_vpd {
 	u8 reserved[4];
 } __packed __aligned(8);
 
+struct ibmvnic_get_vpd_rsp {
+	u8 first;
+	u8 cmd;
+	u8 reserved[10];
+	struct ibmvnic_rc rc;
+} __packed __aligned(8);
+
 struct ibmvnic_acl_change_indication {
 	u8 first;
 	u8 cmd;
@@ -692,10 +715,10 @@ union ibmvnic_crq {
 	struct ibmvnic_change_mac_addr change_mac_addr_rsp;
 	struct ibmvnic_multicast_ctrl multicast_ctrl;
 	struct ibmvnic_multicast_ctrl multicast_ctrl_rsp;
-	struct ibmvnic_generic_crq get_vpd_size;
+	struct ibmvnic_get_vpd_size get_vpd_size;
 	struct ibmvnic_get_vpd_size_rsp get_vpd_size_rsp;
 	struct ibmvnic_get_vpd get_vpd;
-	struct ibmvnic_generic_crq get_vpd_rsp;
+	struct ibmvnic_get_vpd_rsp get_vpd_rsp;
 	struct ibmvnic_acl_change_indication acl_change_indication;
 	struct ibmvnic_acl_query acl_query;
 	struct ibmvnic_generic_crq acl_query_rsp;
@@ -896,6 +919,8 @@ struct ibmvnic_tx_pool {
 	wait_queue_head_t ibmvnic_tx_comp_q;
 	struct task_struct *work_thread;
 	struct ibmvnic_long_term_buff long_term_buff;
+	struct ibmvnic_long_term_buff tso_ltb;
+	int tso_index;
 };
 
 struct ibmvnic_rx_buff {
@@ -927,6 +952,12 @@ struct ibmvnic_error_buff {
 	__be32 error_id;
 };
 
+struct ibmvnic_vpd {
+	unsigned char *buff;
+	dma_addr_t dma_addr;
+	u64 len;
+};
+
 enum vnic_state {VNIC_PROBING = 1,
 		 VNIC_PROBED,
 		 VNIC_OPENING,
@@ -940,13 +971,23 @@ enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
 			   VNIC_RESET_MOBILITY,
 			   VNIC_RESET_FATAL,
 			   VNIC_RESET_NON_FATAL,
-			   VNIC_RESET_TIMEOUT};
+			   VNIC_RESET_TIMEOUT,
+			   VNIC_RESET_CHANGE_PARAM};
 
 struct ibmvnic_rwi {
 	enum ibmvnic_reset_reason reset_reason;
 	struct list_head list;
 };
 
+struct ibmvnic_tunables {
+	u64 rx_queues;
+	u64 tx_queues;
+	u64 rx_entries;
+	u64 tx_entries;
+	u64 mtu;
+	struct sockaddr mac;
+};
+
 struct ibmvnic_adapter {
 	struct vio_dev *vdev;
 	struct net_device *netdev;
@@ -958,6 +999,10 @@ struct ibmvnic_adapter {
 	dma_addr_t ip_offload_ctrl_tok;
 	u32 msg_enable;
 
+	/* Vital Product Data (VPD) */
+	struct ibmvnic_vpd *vpd;
+	char fw_version[32];
+
 	/* Statistics */
 	struct ibmvnic_statistics stats;
 	dma_addr_t stats_token;
@@ -1007,6 +1052,10 @@ struct ibmvnic_adapter {
 	struct completion fw_done;
 	int fw_done_rc;
 
+	struct completion reset_done;
+	int reset_done_rc;
+	bool wait_for_reset;
+
 	/* partner capabilities */
 	u64 min_tx_queues;
 	u64 min_rx_queues;
@@ -1051,4 +1100,9 @@ struct ibmvnic_adapter {
 	struct work_struct ibmvnic_reset;
 	bool resetting;
 	bool napi_enabled, from_passive_init;
+
+	bool mac_change_pending;
+
+	struct ibmvnic_tunables desired;
+	struct ibmvnic_tunables fallback;
 };
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 4d10270ddf8f..44b3937f7e81 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -1710,9 +1710,9 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
 	}
 }
 
-static void e100_watchdog(unsigned long data)
+static void e100_watchdog(struct timer_list *t)
 {
-	struct nic *nic = (struct nic *)data;
+	struct nic *nic = from_timer(nic, t, watchdog);
 	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 	u32 speed;
 
@@ -1910,11 +1910,10 @@ static int e100_alloc_cbs(struct nic *nic)
 	nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL;
 	nic->cbs_avail = 0;
 
-	nic->cbs = pci_pool_alloc(nic->cbs_pool, GFP_KERNEL,
-				  &nic->cbs_dma_addr);
+	nic->cbs = pci_pool_zalloc(nic->cbs_pool, GFP_KERNEL,
+				   &nic->cbs_dma_addr);
 	if (!nic->cbs)
 		return -ENOMEM;
-	memset(nic->cbs, 0, count * sizeof(struct cb));
 
 	for (cb = nic->cbs, i = 0; i < count; cb++, i++) {
 		cb->next = (i + 1 < count) ? cb + 1 : nic->cbs;
@@ -2921,7 +2920,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_master(pdev);
 
-	setup_timer(&nic->watchdog, e100_watchdog, (unsigned long)nic);
+	timer_setup(&nic->watchdog, e100_watchdog, 0);
 
 	INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
 
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index 0641c0098738..afb7ebe20b24 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -398,6 +398,7 @@
 #define E1000_ICR_LSC           0x00000004 /* Link Status Change */
 #define E1000_ICR_RXSEQ         0x00000008 /* Rx sequence error */
 #define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
+#define E1000_ICR_RXO           0x00000040 /* Receiver Overrun */
 #define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
 #define E1000_ICR_ECCER         0x00400000 /* Uncorrectable ECC Error */
 /* If this bit asserted, the driver should claim the interrupt */
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 98e68888abb1..2311b31bdcac 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -94,10 +94,6 @@ struct e1000_info;
  */
 #define E1000_CHECK_RESET_COUNT		25
 
-#define DEFAULT_RDTR			0
-#define DEFAULT_RADV			8
-#define BURST_RDTR			0x20
-#define BURST_RADV			0x20
 #define PCICFG_DESC_RING_STATUS		0xe4
 #define FLUSH_DESC_REQUIRED		0x100
 
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index b322011ec282..f457c5703d0c 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -410,6 +410,9 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw)
  *  Checks to see of the link status of the hardware has changed.  If a
  *  change in link status has been detected, then we read the PHY registers
  *  to get the current speed/duplex if link exists.
+ *
+ *  Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ *  up).
  **/
 s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 {
@@ -423,7 +426,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 	 * Change or Rx Sequence Error interrupt.
 	 */
 	if (!mac->get_link_status)
-		return 0;
+		return 1;
 
 	/* First we want to see if the MII Status Register reports
 	 * link.  If so, then we want to get the current speed/duplex
@@ -461,10 +464,12 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 	 * different link partner.
 	 */
 	ret_val = e1000e_config_fc_after_link_up(hw);
-	if (ret_val)
+	if (ret_val) {
 		e_dbg("Error configuring flow control\n");
+		return ret_val;
+	}
 
-	return ret_val;
+	return 1;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 327dfe5bedc0..f2f49239b015 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1071,7 +1071,8 @@ next_desc:
 }
 
 static void e1000_put_txbuf(struct e1000_ring *tx_ring,
-			    struct e1000_buffer *buffer_info)
+			    struct e1000_buffer *buffer_info,
+			    bool drop)
 {
 	struct e1000_adapter *adapter = tx_ring->adapter;
 
@@ -1085,7 +1086,10 @@ static void e1000_put_txbuf(struct e1000_ring *tx_ring,
 		buffer_info->dma = 0;
 	}
 	if (buffer_info->skb) {
-		dev_kfree_skb_any(buffer_info->skb);
+		if (drop)
+			dev_kfree_skb_any(buffer_info->skb);
+		else
+			dev_consume_skb_any(buffer_info->skb);
 		buffer_info->skb = NULL;
 	}
 	buffer_info->time_stamp = 0;
@@ -1199,7 +1203,7 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work)
 		wmb(); /* force write prior to skb_tstamp_tx */
 
 		skb_tstamp_tx(skb, &shhwtstamps);
-		dev_kfree_skb_any(skb);
+		dev_consume_skb_any(skb);
 	} else if (time_after(jiffies, adapter->tx_hwtstamp_start
 			      + adapter->tx_timeout_factor * HZ)) {
 		dev_kfree_skb_any(adapter->tx_hwtstamp_skb);
@@ -1254,7 +1258,7 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring)
 				}
 			}
 
-			e1000_put_txbuf(tx_ring, buffer_info);
+			e1000_put_txbuf(tx_ring, buffer_info, false);
 			tx_desc->upper.data = 0;
 
 			i++;
@@ -1910,14 +1914,30 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 	struct net_device *netdev = data;
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
+	u32 icr;
+	bool enable = true;
+
+	icr = er32(ICR);
+	if (icr & E1000_ICR_RXO) {
+		ew32(ICR, E1000_ICR_RXO);
+		enable = false;
+		/* napi poll will re-enable Other, make sure it runs */
+		if (napi_schedule_prep(&adapter->napi)) {
+			adapter->total_rx_bytes = 0;
+			adapter->total_rx_packets = 0;
+			__napi_schedule(&adapter->napi);
+		}
+	}
+	if (icr & E1000_ICR_LSC) {
+		ew32(ICR, E1000_ICR_LSC);
+		hw->mac.get_link_status = true;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__E1000_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
 
-	hw->mac.get_link_status = true;
-
-	/* guard against interrupt when we're going down */
-	if (!test_bit(__E1000_DOWN, &adapter->state)) {
-		mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	if (enable && !test_bit(__E1000_DOWN, &adapter->state))
 		ew32(IMS, E1000_IMS_OTHER);
-	}
 
 	return IRQ_HANDLED;
 }
@@ -2421,7 +2441,7 @@ static void e1000_clean_tx_ring(struct e1000_ring *tx_ring)
 
 	for (i = 0; i < tx_ring->count; i++) {
 		buffer_info = &tx_ring->buffer_info[i];
-		e1000_put_txbuf(tx_ring, buffer_info);
+		e1000_put_txbuf(tx_ring, buffer_info, false);
 	}
 
 	netdev_reset_queue(adapter->netdev);
@@ -2687,7 +2707,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
 		napi_complete_done(napi, work_done);
 		if (!test_bit(__E1000_DOWN, &adapter->state)) {
 			if (adapter->msix_entries)
-				ew32(IMS, adapter->rx_ring->ims_val);
+				ew32(IMS, adapter->rx_ring->ims_val |
+				     E1000_IMS_OTHER);
 			else
 				e1000_irq_enable(adapter);
 		}
@@ -3004,8 +3025,8 @@ static void e1000_configure_tx(struct e1000_adapter *adapter)
 
 	hw->mac.ops.config_collision_dist(hw);
 
-	/* SPT and CNP Si errata workaround to avoid data corruption */
-	if (hw->mac.type >= e1000_pch_spt) {
+	/* SPT and KBL Si errata workaround to avoid data corruption */
+	if (hw->mac.type == e1000_pch_spt) {
 		u32 reg_val;
 
 		reg_val = er32(IOSFPC);
@@ -3013,7 +3034,9 @@ static void e1000_configure_tx(struct e1000_adapter *adapter)
 		ew32(IOSFPC, reg_val);
 
 		reg_val = er32(TARC(0));
-		reg_val |= E1000_TARC0_CB_MULTIQ_3_REQ;
+		/* SPT and KBL Si errata workaround to avoid Tx hang */
+		reg_val &= ~BIT(28);
+		reg_val |= BIT(29);
 		ew32(TARC(0), reg_val);
 	}
 }
@@ -3223,14 +3246,6 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
 		 */
 		ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
 		ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
-
-		/* override the delay timers for enabling bursting, only if
-		 * the value was not set by the user via module options
-		 */
-		if (adapter->rx_int_delay == DEFAULT_RDTR)
-			adapter->rx_int_delay = BURST_RDTR;
-		if (adapter->rx_abs_int_delay == DEFAULT_RADV)
-			adapter->rx_abs_int_delay = BURST_RADV;
 	}
 
 	/* set the Receive Delay Timer Register */
@@ -4204,7 +4219,7 @@ static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
 	struct e1000_hw *hw = &adapter->hw;
 
 	if (adapter->msix_entries)
-		ew32(ICS, E1000_ICS_OTHER);
+		ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER);
 	else
 		ew32(ICS, E1000_ICS_LSC);
 }
@@ -4808,9 +4823,9 @@ static void e1000e_update_phy_task(struct work_struct *work)
  * Need to wait a few seconds after link up to get diagnostic information from
  * the phy
  **/
-static void e1000_update_phy_info(unsigned long data)
+static void e1000_update_phy_info(struct timer_list *t)
 {
-	struct e1000_adapter *adapter = (struct e1000_adapter *)data;
+	struct e1000_adapter *adapter = from_timer(adapter, t, phy_info_timer);
 
 	if (test_bit(__E1000_DOWN, &adapter->state))
 		return;
@@ -5074,14 +5089,14 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
 
 	/* get_link_status is set on LSC (link status) interrupt or
 	 * Rx sequence error interrupt.  get_link_status will stay
-	 * false until the check_for_link establishes link
+	 * true until the check_for_link establishes link
 	 * for copper adapters ONLY
 	 */
 	switch (hw->phy.media_type) {
 	case e1000_media_type_copper:
 		if (hw->mac.get_link_status) {
 			ret_val = hw->mac.ops.check_for_link(hw);
-			link_active = !hw->mac.get_link_status;
+			link_active = ret_val > 0;
 		} else {
 			link_active = true;
 		}
@@ -5092,14 +5107,14 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
 		break;
 	case e1000_media_type_internal_serdes:
 		ret_val = hw->mac.ops.check_for_link(hw);
-		link_active = adapter->hw.mac.serdes_has_link;
+		link_active = hw->mac.serdes_has_link;
 		break;
 	default:
 	case e1000_media_type_unknown:
 		break;
 	}
 
-	if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
+	if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
 	    (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
 		/* See e1000_kmrn_lock_loss_workaround_ich8lan() */
 		e_info("Gigabit has been disabled, downgrading speed\n");
@@ -5144,9 +5159,9 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
  * e1000_watchdog - Timer Call-back
  * @data: pointer to adapter cast into an unsigned long
  **/
-static void e1000_watchdog(unsigned long data)
+static void e1000_watchdog(struct timer_list *t)
 {
-	struct e1000_adapter *adapter = (struct e1000_adapter *)data;
+	struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
 
 	/* Do the rest outside of interrupt context */
 	schedule_work(&adapter->watchdog_task);
@@ -5614,7 +5629,7 @@ dma_error:
 			i += tx_ring->count;
 		i--;
 		buffer_info = &tx_ring->buffer_info[i];
-		e1000_put_txbuf(tx_ring, buffer_info);
+		e1000_put_txbuf(tx_ring, buffer_info, true);
 	}
 
 	return 0;
@@ -7252,13 +7267,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_eeprom;
 	}
 
-	init_timer(&adapter->watchdog_timer);
-	adapter->watchdog_timer.function = e1000_watchdog;
-	adapter->watchdog_timer.data = (unsigned long)adapter;
-
-	init_timer(&adapter->phy_info_timer);
-	adapter->phy_info_timer.function = e1000_update_phy_info;
-	adapter->phy_info_timer.data = (unsigned long)adapter;
+	timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+	timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
 
 	INIT_WORK(&adapter->reset_task, e1000_reset_task);
 	INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
@@ -7411,7 +7421,7 @@ static void e1000_remove(struct pci_dev *pdev)
 	if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
 		cancel_work_sync(&adapter->tx_hwtstamp_work);
 		if (adapter->tx_hwtstamp_skb) {
-			dev_kfree_skb_any(adapter->tx_hwtstamp_skb);
+			dev_consume_skb_any(adapter->tx_hwtstamp_skb);
 			adapter->tx_hwtstamp_skb = NULL;
 		}
 	}
diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
index 6d8c39abee16..47da51864543 100644
--- a/drivers/net/ethernet/intel/e1000e/param.c
+++ b/drivers/net/ethernet/intel/e1000e/param.c
@@ -73,17 +73,25 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
 /* Receive Interrupt Delay in units of 1.024 microseconds
  * hardware will likely hang if you set this to anything but zero.
  *
+ * Burst variant is used as default if device has FLAG2_DMA_BURST.
+ *
  * Valid Range: 0-65535
  */
 E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
+#define DEFAULT_RDTR	0
+#define BURST_RDTR	0x20
 #define MAX_RXDELAY 0xFFFF
 #define MIN_RXDELAY 0
 
 /* Receive Absolute Interrupt Delay in units of 1.024 microseconds
  *
+ * Burst variant is used as default if device has FLAG2_DMA_BURST.
+ *
  * Valid Range: 0-65535
  */
 E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
+#define DEFAULT_RADV	8
+#define BURST_RADV	0x20
 #define MAX_RXABSDELAY 0xFFFF
 #define MIN_RXABSDELAY 0
 
@@ -297,6 +305,9 @@ void e1000e_check_options(struct e1000_adapter *adapter)
 					 .max = MAX_RXDELAY } }
 		};
 
+		if (adapter->flags2 & FLAG2_DMA_BURST)
+			opt.def = BURST_RDTR;
+
 		if (num_RxIntDelay > bd) {
 			adapter->rx_int_delay = RxIntDelay[bd];
 			e1000_validate_option(&adapter->rx_int_delay, &opt,
@@ -307,7 +318,7 @@ void e1000e_check_options(struct e1000_adapter *adapter)
 	}
 	/* Receive Absolute Interrupt Delay */
 	{
-		static const struct e1000_option opt = {
+		static struct e1000_option opt = {
 			.type = range_option,
 			.name = "Receive Absolute Interrupt Delay",
 			.err  = "using default of "
@@ -317,6 +328,9 @@ void e1000e_check_options(struct e1000_adapter *adapter)
 					 .max = MAX_RXABSDELAY } }
 		};
 
+		if (adapter->flags2 & FLAG2_DMA_BURST)
+			opt.def = BURST_RADV;
+
 		if (num_RxAbsIntDelay > bd) {
 			adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
 			e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index d78d47b41a71..86ff0969efb6 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -1744,6 +1744,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
 	s32 ret_val = 0;
 	u16 i, phy_status;
 
+	*success = false;
 	for (i = 0; i < iterations; i++) {
 		/* Some PHYs require the MII_BMSR register to be read
 		 * twice due to the link bit being sticky.  No harm doing
@@ -1763,16 +1764,16 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
 		ret_val = e1e_rphy(hw, MII_BMSR, &phy_status);
 		if (ret_val)
 			break;
-		if (phy_status & BMSR_LSTATUS)
+		if (phy_status & BMSR_LSTATUS) {
+			*success = true;
 			break;
+		}
 		if (usec_interval >= 1000)
 			msleep(usec_interval / 1000);
 		else
 			udelay(usec_interval);
 	}
 
-	*success = (i < iterations);
-
 	return ret_val;
 }
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 689c413b7782..46973fb234c5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -248,6 +248,29 @@ struct fm10k_udp_port {
 	__be16			port;
 };
 
+enum fm10k_macvlan_request_type {
+	FM10K_UC_MAC_REQUEST,
+	FM10K_MC_MAC_REQUEST,
+	FM10K_VLAN_REQUEST
+};
+
+struct fm10k_macvlan_request {
+	enum fm10k_macvlan_request_type type;
+	struct list_head list;
+	union {
+		struct fm10k_mac_request {
+			u8 addr[ETH_ALEN];
+			u16 glort;
+			u16 vid;
+		} mac;
+		struct fm10k_vlan_request {
+			u32 vid;
+			u8 vsi;
+		} vlan;
+	};
+	bool set;
+};
+
 /* one work queue for entire driver */
 extern struct workqueue_struct *fm10k_workqueue;
 
@@ -270,11 +293,15 @@ enum fm10k_flags_t {
 
 enum fm10k_state_t {
 	__FM10K_RESETTING,
+	__FM10K_RESET_DETACHED,
+	__FM10K_RESET_SUSPENDED,
 	__FM10K_DOWN,
 	__FM10K_SERVICE_SCHED,
 	__FM10K_SERVICE_REQUEST,
 	__FM10K_SERVICE_DISABLE,
-	__FM10K_MBX_LOCK,
+	__FM10K_MACVLAN_SCHED,
+	__FM10K_MACVLAN_REQUEST,
+	__FM10K_MACVLAN_DISABLE,
 	__FM10K_LINK_DOWN,
 	__FM10K_UPDATING_STATS,
 	/* This value must be last and determines the BITMAP size */
@@ -344,6 +371,8 @@ struct fm10k_intfc {
 
 	struct fm10k_hw_stats stats;
 	struct fm10k_hw hw;
+	/* Mailbox lock */
+	spinlock_t mbx_lock;
 	u32 __iomem *uc_addr;
 	u32 __iomem *sw_addr;
 	u16 msg_enable;
@@ -365,6 +394,12 @@ struct fm10k_intfc {
 	struct list_head vxlan_port;
 	struct list_head geneve_port;
 
+	/* MAC/VLAN update queue */
+	struct list_head macvlan_requests;
+	struct delayed_work macvlan_task;
+	/* MAC/VLAN update queue lock */
+	spinlock_t macvlan_lock;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dbg_intfc;
 #endif /* CONFIG_DEBUG_FS */
@@ -384,23 +419,17 @@ struct fm10k_intfc {
 
 static inline void fm10k_mbx_lock(struct fm10k_intfc *interface)
 {
-	/* busy loop if we cannot obtain the lock as some calls
-	 * such as ndo_set_rx_mode may be made in atomic context
-	 */
-	while (test_and_set_bit(__FM10K_MBX_LOCK, interface->state))
-		udelay(20);
+	spin_lock(&interface->mbx_lock);
 }
 
 static inline void fm10k_mbx_unlock(struct fm10k_intfc *interface)
 {
-	/* flush memory to make sure state is correct */
-	smp_mb__before_atomic();
-	clear_bit(__FM10K_MBX_LOCK, interface->state);
+	spin_unlock(&interface->mbx_lock);
 }
 
 static inline int fm10k_mbx_trylock(struct fm10k_intfc *interface)
 {
-	return !test_and_set_bit(__FM10K_MBX_LOCK, interface->state);
+	return spin_trylock(&interface->mbx_lock);
 }
 
 /* fm10k_test_staterr - test bits in Rx descriptor status and error fields */
@@ -490,6 +519,7 @@ void fm10k_up(struct fm10k_intfc *interface);
 void fm10k_down(struct fm10k_intfc *interface);
 void fm10k_update_stats(struct fm10k_intfc *interface);
 void fm10k_service_event_schedule(struct fm10k_intfc *interface);
+void fm10k_macvlan_schedule(struct fm10k_intfc *interface);
 void fm10k_update_rx_drop_en(struct fm10k_intfc *interface);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 void fm10k_netpoll(struct net_device *netdev);
@@ -510,6 +540,12 @@ void fm10k_reset_rx_state(struct fm10k_intfc *);
 int fm10k_setup_tc(struct net_device *dev, u8 tc);
 int fm10k_open(struct net_device *netdev);
 int fm10k_close(struct net_device *netdev);
+int fm10k_queue_vlan_request(struct fm10k_intfc *interface, u32 vid,
+			     u8 vsi, bool set);
+int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort,
+			    const unsigned char *addr, u16 vid, bool set);
+void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface,
+			       u16 glort, bool vlans);
 
 /* Ethtool */
 void fm10k_set_ethtool_ops(struct net_device *dev);
@@ -526,8 +562,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
 int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
 			  int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
-int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
-			int unused);
+int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
+			int __always_unused min_rate, int max_rate);
 int fm10k_ndo_get_vf_config(struct net_device *netdev,
 			    int vf_idx, struct ifla_vf_info *ivi);
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 62a6ad9b3eed..736a9f087bc9 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -517,8 +517,8 @@ s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready)
 		goto out;
 	}
 
-	/* verify Mailbox is still valid */
-	if (!mbx->ops.tx_ready(mbx, FM10K_VFMBX_MSG_MTU))
+	/* verify Mailbox is still open */
+	if (mbx->state != FM10K_STATE_OPEN)
 		goto out;
 
 	/* interface cannot receive traffic without logical ports */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 5116fd043630..14df09e2d964 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -52,9 +52,9 @@ static void fm10k_dbg_desc_seq_stop(struct seq_file __always_unused *s,
 static void fm10k_dbg_desc_break(struct seq_file *s, int i)
 {
 	while (i--)
-		seq_puts(s, "-");
+		seq_putc(s, '-');
 
-	seq_puts(s, "\n");
+	seq_putc(s, '\n');
 }
 
 static int fm10k_dbg_tx_desc_seq_show(struct seq_file *s, void *v)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 5f4dac0d36ef..ea3ab24265ee 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -35,10 +35,133 @@ static s32 fm10k_iov_msg_error(struct fm10k_hw *hw, u32 **results,
 	return fm10k_tlv_msg_error(hw, results, mbx);
 }
 
+/**
+ *  fm10k_iov_msg_queue_mac_vlan - Message handler for MAC/VLAN request from VF
+ *  @hw: Pointer to hardware structure
+ *  @results: Pointer array to message, results[0] is pointer to message
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This function is a custom handler for MAC/VLAN requests from the VF. The
+ *  assumption is that it is acceptable to directly hand off the message from
+ *  the VF to the PF's switch manager. However, we use a MAC/VLAN message
+ *  queue to avoid overloading the mailbox when a large number of requests
+ *  come in.
+ **/
+static s32 fm10k_iov_msg_queue_mac_vlan(struct fm10k_hw *hw, u32 **results,
+					struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
+	struct fm10k_intfc *interface = hw->back;
+	u8 mac[ETH_ALEN];
+	u32 *result;
+	int err = 0;
+	bool set;
+	u16 vlan;
+	u32 vid;
+
+	/* we shouldn't be updating rules on a disabled interface */
+	if (!FM10K_VF_FLAG_ENABLED(vf_info))
+		err = FM10K_ERR_PARAM;
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) {
+		result = results[FM10K_MAC_VLAN_MSG_VLAN];
+
+		/* record VLAN id requested */
+		err = fm10k_tlv_attr_get_u32(result, &vid);
+		if (err)
+			return err;
+
+		set = !(vid & FM10K_VLAN_CLEAR);
+		vid &= ~FM10K_VLAN_CLEAR;
+
+		/* if the length field has been set, this is a multi-bit
+		 * update request. For multi-bit requests, simply disallow
+		 * them when the pf_vid has been set. In this case, the PF
+		 * should have already cleared the VLAN_TABLE, and if we
+		 * allowed them, it could allow a rogue VF to receive traffic
+		 * on a VLAN it was not assigned. In the single-bit case, we
+		 * need to modify requests for VLAN 0 to use the default PF or
+		 * SW vid when assigned.
+		 */
+
+		if (vid >> 16) {
+			/* prevent multi-bit requests when PF has
+			 * administratively set the VLAN for this VF
+			 */
+			if (vf_info->pf_vid)
+				return FM10K_ERR_PARAM;
+		} else {
+			err = fm10k_iov_select_vid(vf_info, (u16)vid);
+			if (err < 0)
+				return err;
+
+			vid = err;
+		}
+
+		/* update VSI info for VF in regards to VLAN table */
+		err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
+	}
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) {
+		result = results[FM10K_MAC_VLAN_MSG_MAC];
+
+		/* record unicast MAC address requested */
+		err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+		if (err)
+			return err;
+
+		/* block attempts to set MAC for a locked device */
+		if (is_valid_ether_addr(vf_info->mac) &&
+		    !ether_addr_equal(mac, vf_info->mac))
+			return FM10K_ERR_PARAM;
+
+		set = !(vlan & FM10K_VLAN_CLEAR);
+		vlan &= ~FM10K_VLAN_CLEAR;
+
+		err = fm10k_iov_select_vid(vf_info, vlan);
+		if (err < 0)
+			return err;
+
+		vlan = (u16)err;
+
+		/* Add this request to the MAC/VLAN queue */
+		err = fm10k_queue_mac_request(interface, vf_info->glort,
+					      mac, vlan, set);
+	}
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) {
+		result = results[FM10K_MAC_VLAN_MSG_MULTICAST];
+
+		/* record multicast MAC address requested */
+		err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+		if (err)
+			return err;
+
+		/* verify that the VF is allowed to request multicast */
+		if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED))
+			return FM10K_ERR_PARAM;
+
+		set = !(vlan & FM10K_VLAN_CLEAR);
+		vlan &= ~FM10K_VLAN_CLEAR;
+
+		err = fm10k_iov_select_vid(vf_info, vlan);
+		if (err < 0)
+			return err;
+
+		vlan = (u16)err;
+
+		/* Add this request to the MAC/VLAN queue */
+		err = fm10k_queue_mac_request(interface, vf_info->glort,
+					      mac, vlan, set);
+	}
+
+	return err;
+}
+
 static const struct fm10k_msg_data iov_mbx_data[] = {
 	FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
 	FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf),
-	FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_mac_vlan_pf),
+	FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_queue_mac_vlan),
 	FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf),
 	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_iov_msg_error),
 };
@@ -66,25 +189,21 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface)
 		goto read_unlock;
 
 	/* read VFLRE to determine if any VFs have been reset */
-	do {
-		vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(0));
-		vflre <<= 32;
-		vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(1));
-		vflre = (vflre << 32) | (vflre >> 32);
-		vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0));
+	vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1));
+	vflre <<= 32;
+	vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0));
 
-		i = iov_data->num_vfs;
+	i = iov_data->num_vfs;
 
-		for (vflre <<= 64 - i; vflre && i--; vflre += vflre) {
-			struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+	for (vflre <<= 64 - i; vflre && i--; vflre += vflre) {
+		struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
 
-			if (vflre >= 0)
-				continue;
+		if (vflre >= 0)
+			continue;
 
-			hw->iov.ops.reset_resources(hw, vf_info);
-			vf_info->mbx.ops.connect(hw, &vf_info->mbx);
-		}
-	} while (i != iov_data->num_vfs);
+		hw->iov.ops.reset_resources(hw, vf_info);
+		vf_info->mbx.ops.connect(hw, &vf_info->mbx);
+	}
 
 read_unlock:
 	rcu_read_unlock();
@@ -126,9 +245,14 @@ process_mbx:
 		struct fm10k_mbx_info *mbx = &vf_info->mbx;
 		u16 glort = vf_info->glort;
 
+		/* process the SM mailbox first to drain outgoing messages */
+		hw->mbx.ops.process(hw, &hw->mbx);
+
 		/* verify port mapping is valid, if not reset port */
-		if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort))
+		if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) {
 			hw->iov.ops.reset_lport(hw, vf_info);
+			fm10k_clear_macvlan_queue(interface, glort, false);
+		}
 
 		/* reset VFs that have mailbox timed out */
 		if (!mbx->timeout) {
@@ -140,6 +264,10 @@ process_mbx:
 		if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
 			/* keep track of how many times this occurs */
 			interface->hw_sm_mbx_full++;
+
+			/* make sure we try again momentarily */
+			fm10k_service_event_schedule(interface);
+
 			break;
 		}
 
@@ -187,6 +315,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev)
 
 		hw->iov.ops.reset_resources(hw, vf_info);
 		hw->iov.ops.reset_lport(hw, vf_info);
+		fm10k_clear_macvlan_queue(interface, vf_info->glort, false);
 	}
 }
 
@@ -411,6 +540,8 @@ static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface,
 	/* disable LPORT for this VF which clears switch rules */
 	hw->iov.ops.reset_lport(hw, vf_info);
 
+	fm10k_clear_macvlan_queue(interface, vf_info->glort, false);
+
 	/* assign new MAC+VLAN for this VF */
 	hw->iov.ops.assign_default_mac_vlan(hw, vf_info);
 
@@ -482,7 +613,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
 }
 
 int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
-			int __always_unused unused, int rate)
+			int __always_unused min_rate, int max_rate)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
 	struct fm10k_iov_data *iov_data = interface->iov_data;
@@ -493,14 +624,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
 		return -EINVAL;
 
 	/* rate limit cannot be less than 10Mbs or greater than link speed */
-	if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX))
+	if (max_rate &&
+	    (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX))
 		return -EINVAL;
 
 	/* store values */
-	iov_data->vf_info[vf_idx].rate = rate;
+	iov_data->vf_info[vf_idx].rate = max_rate;
 
 	/* update hardware configuration */
-	hw->iov.ops.configure_tc(hw, vf_idx, rate);
+	hw->iov.ops.configure_tc(hw, vf_idx, max_rate);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 9dffaba85ae6..dbd69310f263 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,7 +28,7 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.21.7-k"
+#define DRV_VERSION	"0.22.1-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
@@ -806,9 +806,10 @@ static int fm10k_tso(struct fm10k_ring *tx_ring,
 	tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
 
 	return 1;
+
 err_vxlan:
 	tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
-	if (!net_ratelimit())
+	if (net_ratelimit())
 		netdev_err(tx_ring->netdev,
 			   "TSO requested for unsupported tunnel, disabling offload\n");
 	return -1;
@@ -876,6 +877,7 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring,
 	case IPPROTO_GRE:
 		if (skb->encapsulation)
 			break;
+		/* fall through */
 	default:
 		if (unlikely(net_ratelimit())) {
 			dev_warn(tx_ring->dev,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index 334088a101c3..244d3ad58ca7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1586,7 +1586,7 @@ s32 fm10k_pfvf_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx,
 			mbx->mbmem_reg = FM10K_MBMEM_VF(id, 0);
 			break;
 		}
-		/* fallthough */
+		/* fall through */
 	default:
 		return FM10K_MBX_ERR_NO_MBX;
 	}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index e69d49d91d67..adc62fb38c49 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -643,9 +643,13 @@ int fm10k_close(struct net_device *netdev)
 static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
+	int num_tx_queues = READ_ONCE(interface->num_tx_queues);
 	unsigned int r_idx = skb->queue_mapping;
 	int err;
 
+	if (!num_tx_queues)
+		return NETDEV_TX_BUSY;
+
 	if ((skb->protocol == htons(ETH_P_8021Q)) &&
 	    !skb_vlan_tag_present(skb)) {
 		/* FM10K only supports hardware tagging, any tags in frame
@@ -698,8 +702,8 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 		__skb_put(skb, pad_len);
 	}
 
-	if (r_idx >= interface->num_tx_queues)
-		r_idx %= interface->num_tx_queues;
+	if (r_idx >= num_tx_queues)
+		r_idx %= num_tx_queues;
 
 	err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]);
 
@@ -754,11 +758,132 @@ static bool fm10k_host_mbx_ready(struct fm10k_intfc *interface)
 	return (hw->mac.type == fm10k_mac_vf || interface->host_ready);
 }
 
+/**
+ * fm10k_queue_vlan_request - Queue a VLAN update request
+ * @interface: the fm10k interface structure
+ * @vid: the VLAN vid
+ * @vsi: VSI index number
+ * @set: whether to set or clear
+ *
+ * This function queues up a VLAN update. For VFs, this must be sent to the
+ * managing PF over the mailbox. For PFs, we'll use the same handling so that
+ * it's similar to the VF. This avoids storming the PF<->VF mailbox with too
+ * many VLAN updates during reset.
+ */
+int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
+			     u32 vid, u8 vsi, bool set)
+{
+	struct fm10k_macvlan_request *request;
+	unsigned long flags;
+
+	/* This must be atomic since we may be called while the netdev
+	 * addr_list_lock is held
+	 */
+	request = kzalloc(sizeof(*request), GFP_ATOMIC);
+	if (!request)
+		return -ENOMEM;
+
+	request->type = FM10K_VLAN_REQUEST;
+	request->vlan.vid = vid;
+	request->vlan.vsi = vsi;
+	request->set = set;
+
+	spin_lock_irqsave(&interface->macvlan_lock, flags);
+	list_add_tail(&request->list, &interface->macvlan_requests);
+	spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+	fm10k_macvlan_schedule(interface);
+
+	return 0;
+}
+
+/**
+ * fm10k_queue_mac_request - Queue a MAC update request
+ * @interface: the fm10k interface structure
+ * @glort: the target glort for this update
+ * @addr: the address to update
+ * @vid: the vid to update
+ * @sync: whether to add or remove
+ *
+ * This function queues up a MAC request for sending to the switch manager.
+ * A separate thread monitors the queue and sends updates to the switch
+ * manager. Return 0 on success, and negative error code on failure.
+ **/
+int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort,
+			    const unsigned char *addr, u16 vid, bool set)
+{
+	struct fm10k_macvlan_request *request;
+	unsigned long flags;
+
+	/* This must be atomic since we may be called while the netdev
+	 * addr_list_lock is held
+	 */
+	request = kzalloc(sizeof(*request), GFP_ATOMIC);
+	if (!request)
+		return -ENOMEM;
+
+	if (is_multicast_ether_addr(addr))
+		request->type = FM10K_MC_MAC_REQUEST;
+	else
+		request->type = FM10K_UC_MAC_REQUEST;
+
+	ether_addr_copy(request->mac.addr, addr);
+	request->mac.glort = glort;
+	request->mac.vid = vid;
+	request->set = set;
+
+	spin_lock_irqsave(&interface->macvlan_lock, flags);
+	list_add_tail(&request->list, &interface->macvlan_requests);
+	spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+	fm10k_macvlan_schedule(interface);
+
+	return 0;
+}
+
+/**
+ * fm10k_clear_macvlan_queue - Cancel pending updates for a given glort
+ * @interface: the fm10k interface structure
+ * @glort: the target glort to clear
+ * @vlans: true to clear VLAN messages, false to ignore them
+ *
+ * Cancel any outstanding MAC/VLAN requests for a given glort. This is
+ * expected to be called when a logical port goes down.
+ **/
+void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface,
+			       u16 glort, bool vlans)
+
+{
+	struct fm10k_macvlan_request *r, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&interface->macvlan_lock, flags);
+
+	/* Free any outstanding MAC/VLAN requests for this interface */
+	list_for_each_entry_safe(r, tmp, &interface->macvlan_requests, list) {
+		switch (r->type) {
+		case FM10K_MC_MAC_REQUEST:
+		case FM10K_UC_MAC_REQUEST:
+			/* Don't free requests for other interfaces */
+			if (r->mac.glort != glort)
+				break;
+			/* fall through */
+		case FM10K_VLAN_REQUEST:
+			if (vlans) {
+				list_del(&r->list);
+				kfree(r);
+			}
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+}
+
 static int fm10k_uc_vlan_unsync(struct net_device *netdev,
 				const unsigned char *uc_addr)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
-	struct fm10k_hw *hw = &interface->hw;
 	u16 glort = interface->glort;
 	u16 vid = interface->vid;
 	bool set = !!(vid / VLAN_N_VID);
@@ -767,10 +892,7 @@ static int fm10k_uc_vlan_unsync(struct net_device *netdev,
 	/* drop any leading bits on the VLAN ID */
 	vid &= VLAN_N_VID - 1;
 
-	if (fm10k_host_mbx_ready(interface))
-		err = hw->mac.ops.update_uc_addr(hw, glort, uc_addr,
-						 vid, set, 0);
-
+	err = fm10k_queue_mac_request(interface, glort, uc_addr, vid, set);
 	if (err)
 		return err;
 
@@ -782,7 +904,6 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev,
 				const unsigned char *mc_addr)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
-	struct fm10k_hw *hw = &interface->hw;
 	u16 glort = interface->glort;
 	u16 vid = interface->vid;
 	bool set = !!(vid / VLAN_N_VID);
@@ -791,9 +912,7 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev,
 	/* drop any leading bits on the VLAN ID */
 	vid &= VLAN_N_VID - 1;
 
-	if (fm10k_host_mbx_ready(interface))
-		err = hw->mac.ops.update_mc_addr(hw, glort, mc_addr, vid, set);
-
+	err = fm10k_queue_mac_request(interface, glort, mc_addr, vid, set);
 	if (err)
 		return err;
 
@@ -851,18 +970,14 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
 
 	/* only need to update the VLAN if not in promiscuous mode */
 	if (!(netdev->flags & IFF_PROMISC)) {
-		err = hw->mac.ops.update_vlan(hw, vid, 0, set);
+		err = fm10k_queue_vlan_request(interface, vid, 0, set);
 		if (err)
 			goto err_out;
 	}
 
-	/* update our base MAC address if host's mailbox is ready */
-	if (fm10k_host_mbx_ready(interface))
-		err = hw->mac.ops.update_uc_addr(hw, interface->glort,
-						 hw->mac.addr, vid, set, 0);
-	else
-		err = -EHOSTDOWN;
-
+	/* Update our base MAC address */
+	err = fm10k_queue_mac_request(interface, interface->glort,
+				      hw->mac.addr, vid, set);
 	if (err)
 		goto err_out;
 
@@ -906,7 +1021,6 @@ static u16 fm10k_find_next_vlan(struct fm10k_intfc *interface, u16 vid)
 
 static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface)
 {
-	struct fm10k_hw *hw = &interface->hw;
 	u32 vid, prev_vid;
 
 	/* loop through and find any gaps in the table */
@@ -918,7 +1032,7 @@ static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface)
 
 		/* send request to clear multiple bits at a time */
 		prev_vid += (vid - prev_vid - 1) << FM10K_VLAN_LENGTH_SHIFT;
-		hw->mac.ops.update_vlan(hw, prev_vid, 0, false);
+		fm10k_queue_vlan_request(interface, prev_vid, 0, false);
 	}
 }
 
@@ -933,15 +1047,11 @@ static int __fm10k_uc_sync(struct net_device *dev,
 	if (!is_valid_ether_addr(addr))
 		return -EADDRNOTAVAIL;
 
-	/* update table with current entries if host's mailbox is ready */
-	if (!fm10k_host_mbx_ready(interface))
-		return -EHOSTDOWN;
-
 	for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
 	     vid < VLAN_N_VID;
 	     vid = fm10k_find_next_vlan(interface, vid)) {
-		err = hw->mac.ops.update_uc_addr(hw, glort, addr,
-						 vid, sync, 0);
+		err = fm10k_queue_mac_request(interface, glort,
+					      addr, vid, sync);
 		if (err)
 			return err;
 	}
@@ -998,15 +1108,18 @@ static int __fm10k_mc_sync(struct net_device *dev,
 	struct fm10k_intfc *interface = netdev_priv(dev);
 	struct fm10k_hw *hw = &interface->hw;
 	u16 vid, glort = interface->glort;
+	s32 err;
 
-	/* update table with current entries if host's mailbox is ready */
-	if (!fm10k_host_mbx_ready(interface))
-		return 0;
+	if (!is_multicast_ether_addr(addr))
+		return -EADDRNOTAVAIL;
 
 	for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
 	     vid < VLAN_N_VID;
 	     vid = fm10k_find_next_vlan(interface, vid)) {
-		hw->mac.ops.update_mc_addr(hw, glort, addr, vid, sync);
+		err = fm10k_queue_mac_request(interface, glort,
+					      addr, vid, sync);
+		if (err)
+			return err;
 	}
 
 	return 0;
@@ -1046,7 +1159,8 @@ static void fm10k_set_rx_mode(struct net_device *dev)
 	if (interface->xcast_mode != xcast_mode) {
 		/* update VLAN table */
 		if (xcast_mode == FM10K_XCAST_MODE_PROMISC)
-			hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, true);
+			fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL,
+						 0, true);
 		if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC)
 			fm10k_clear_unused_vlans(interface);
 
@@ -1094,22 +1208,20 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
 					       interface->glort_count, true);
 
 	/* update VLAN table */
-	hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0,
-				xcast_mode == FM10K_XCAST_MODE_PROMISC);
+	fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0,
+				 xcast_mode == FM10K_XCAST_MODE_PROMISC);
 
 	/* Add filter for VLAN 0 */
-	hw->mac.ops.update_vlan(hw, 0, 0, true);
+	fm10k_queue_vlan_request(interface, 0, 0, true);
 
 	/* update table with current entries */
 	for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
 	     vid < VLAN_N_VID;
 	     vid = fm10k_find_next_vlan(interface, vid)) {
-		hw->mac.ops.update_vlan(hw, vid, 0, true);
+		fm10k_queue_vlan_request(interface, vid, 0, true);
 
-		/* Update unicast entries if host's mailbox is ready */
-		if (fm10k_host_mbx_ready(interface))
-			hw->mac.ops.update_uc_addr(hw, glort, hw->mac.addr,
-						   vid, true, 0);
+		fm10k_queue_mac_request(interface, glort,
+					hw->mac.addr, vid, true);
 	}
 
 	/* update xcast mode before synchronizing addresses if host's mailbox
@@ -1136,6 +1248,13 @@ void fm10k_reset_rx_state(struct fm10k_intfc *interface)
 	struct net_device *netdev = interface->netdev;
 	struct fm10k_hw *hw = &interface->hw;
 
+	/* Wait for MAC/VLAN work to finish */
+	while (test_bit(__FM10K_MACVLAN_SCHED, interface->state))
+		usleep_range(1000, 2000);
+
+	/* Cancel pending MAC/VLAN requests */
+	fm10k_clear_macvlan_queue(interface, interface->glort, true);
+
 	fm10k_mbx_lock(interface);
 
 	/* clear the logical port state on lower device if host's mailbox is
@@ -1270,7 +1389,7 @@ static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type,
 {
 	struct tc_mqprio_qopt *mqprio = type_data;
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
@@ -1370,8 +1489,8 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
 	if (fm10k_host_mbx_ready(interface)) {
 		hw->mac.ops.update_xcast_mode(hw, glort,
 					      FM10K_XCAST_MODE_MULTI);
-		hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr,
-					   0, true, 0);
+		fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+					0, true);
 	}
 
 	fm10k_mbx_unlock(interface);
@@ -1410,8 +1529,8 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
 	if (fm10k_host_mbx_ready(interface)) {
 		hw->mac.ops.update_xcast_mode(hw, glort,
 					      FM10K_XCAST_MODE_NONE);
-		hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr,
-					   0, false, 0);
+		fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+					0, false);
 	}
 
 	fm10k_mbx_unlock(interface);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 63784576ae8b..7f605221a686 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -91,6 +91,76 @@ static int fm10k_hw_ready(struct fm10k_intfc *interface)
 	return FM10K_REMOVED(hw->hw_addr) ? -ENODEV : 0;
 }
 
+/**
+ * fm10k_macvlan_schedule - Schedule MAC/VLAN queue task
+ * @interface: fm10k private interface structure
+ *
+ * Schedule the MAC/VLAN queue monitor task. If the MAC/VLAN task cannot be
+ * started immediately, request that it be restarted when possible.
+ */
+void fm10k_macvlan_schedule(struct fm10k_intfc *interface)
+{
+	/* Avoid processing the MAC/VLAN queue when the service task is
+	 * disabled, or when we're resetting the device.
+	 */
+	if (!test_bit(__FM10K_MACVLAN_DISABLE, interface->state) &&
+	    !test_and_set_bit(__FM10K_MACVLAN_SCHED, interface->state)) {
+		clear_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+		/* We delay the actual start of execution in order to allow
+		 * multiple MAC/VLAN updates to accumulate before handling
+		 * them, and to allow some time to let the mailbox drain
+		 * between runs.
+		 */
+		queue_delayed_work(fm10k_workqueue,
+				   &interface->macvlan_task, 10);
+	} else {
+		set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+	}
+}
+
+/**
+ * fm10k_stop_macvlan_task - Stop the MAC/VLAN queue monitor
+ * @interface: fm10k private interface structure
+ *
+ * Wait until the MAC/VLAN queue task has stopped, and cancel any future
+ * requests.
+ */
+static void fm10k_stop_macvlan_task(struct fm10k_intfc *interface)
+{
+	/* Disable the MAC/VLAN work item */
+	set_bit(__FM10K_MACVLAN_DISABLE, interface->state);
+
+	/* Make sure we waited until any current invocations have stopped */
+	cancel_delayed_work_sync(&interface->macvlan_task);
+
+	/* We set the __FM10K_MACVLAN_SCHED bit when we schedule the task.
+	 * However, it may not be unset of the MAC/VLAN task never actually
+	 * got a chance to run. Since we've canceled the task here, and it
+	 * cannot be rescheuled right now, we need to ensure the scheduled bit
+	 * gets unset.
+	 */
+	clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
+}
+
+/**
+ * fm10k_resume_macvlan_task - Restart the MAC/VLAN queue monitor
+ * @interface: fm10k private interface structure
+ *
+ * Clear the __FM10K_MACVLAN_DISABLE bit and, if a request occurred, schedule
+ * the MAC/VLAN work monitor.
+ */
+static void fm10k_resume_macvlan_task(struct fm10k_intfc *interface)
+{
+	/* Re-enable the MAC/VLAN work item */
+	clear_bit(__FM10K_MACVLAN_DISABLE, interface->state);
+
+	/* We might have received a MAC/VLAN request while disabled. If so,
+	 * kick off the queue now.
+	 */
+	if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
+		fm10k_macvlan_schedule(interface);
+}
+
 void fm10k_service_event_schedule(struct fm10k_intfc *interface)
 {
 	if (!test_bit(__FM10K_SERVICE_DISABLE, interface->state) &&
@@ -118,13 +188,35 @@ static void fm10k_service_event_complete(struct fm10k_intfc *interface)
 		fm10k_service_event_schedule(interface);
 }
 
+static void fm10k_stop_service_event(struct fm10k_intfc *interface)
+{
+	set_bit(__FM10K_SERVICE_DISABLE, interface->state);
+	cancel_work_sync(&interface->service_task);
+
+	/* It's possible that cancel_work_sync stopped the service task from
+	 * running before it could actually start. In this case the
+	 * __FM10K_SERVICE_SCHED bit will never be cleared. Since we know that
+	 * the service task cannot be running at this point, we need to clear
+	 * the scheduled bit, as otherwise the service task may never be
+	 * restarted.
+	 */
+	clear_bit(__FM10K_SERVICE_SCHED, interface->state);
+}
+
+static void fm10k_start_service_event(struct fm10k_intfc *interface)
+{
+	clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
+	fm10k_service_event_schedule(interface);
+}
+
 /**
  * fm10k_service_timer - Timer Call-back
  * @data: pointer to interface cast into an unsigned long
  **/
-static void fm10k_service_timer(unsigned long data)
+static void fm10k_service_timer(struct timer_list *t)
 {
-	struct fm10k_intfc *interface = (struct fm10k_intfc *)data;
+	struct fm10k_intfc *interface = from_timer(interface, t,
+						   service_timer);
 
 	/* Reset the timer */
 	mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
@@ -132,36 +224,15 @@ static void fm10k_service_timer(unsigned long data)
 	fm10k_service_event_schedule(interface);
 }
 
-static void fm10k_detach_subtask(struct fm10k_intfc *interface)
-{
-	struct net_device *netdev = interface->netdev;
-	u32 __iomem *hw_addr;
-	u32 value;
-
-	/* do nothing if device is still present or hw_addr is set */
-	if (netif_device_present(netdev) || interface->hw.hw_addr)
-		return;
-
-	/* check the real address space to see if we've recovered */
-	hw_addr = READ_ONCE(interface->uc_addr);
-	value = readl(hw_addr);
-	if (~value) {
-		interface->hw.hw_addr = interface->uc_addr;
-		netif_device_attach(netdev);
-		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
-		netdev_warn(netdev, "PCIe link restored, device now attached\n");
-		return;
-	}
-
-	rtnl_lock();
-
-	if (netif_running(netdev))
-		dev_close(netdev);
-
-	rtnl_unlock();
-}
-
-static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)
+/**
+ * fm10k_prepare_for_reset - Prepare the driver and device for a pending reset
+ * @interface: fm10k private data structure
+ *
+ * This function prepares for a device reset by shutting as much down as we
+ * can. It does nothing and returns false if __FM10K_RESETTING was already set
+ * prior to calling this function. It returns true if it actually did work.
+ */
+static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface)
 {
 	struct net_device *netdev = interface->netdev;
 
@@ -170,8 +241,15 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)
 	/* put off any impending NetWatchDogTimeout */
 	netif_trans_update(netdev);
 
-	while (test_and_set_bit(__FM10K_RESETTING, interface->state))
-		usleep_range(1000, 2000);
+	/* Nothing to do if a reset is already in progress */
+	if (test_and_set_bit(__FM10K_RESETTING, interface->state))
+		return false;
+
+	/* As the MAC/VLAN task will be accessing registers it must not be
+	 * running while we reset. Although the task will not be scheduled
+	 * once we start resetting it may already be running
+	 */
+	fm10k_stop_macvlan_task(interface);
 
 	rtnl_lock();
 
@@ -189,6 +267,8 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)
 	interface->last_reset = jiffies + (10 * HZ);
 
 	rtnl_unlock();
+
+	return true;
 }
 
 static int fm10k_handle_reset(struct fm10k_intfc *interface)
@@ -197,6 +277,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface)
 	struct fm10k_hw *hw = &interface->hw;
 	int err;
 
+	WARN_ON(!test_bit(__FM10K_RESETTING, interface->state));
+
 	rtnl_lock();
 
 	pci_set_master(interface->pdev);
@@ -253,6 +335,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface)
 
 	rtnl_unlock();
 
+	fm10k_resume_macvlan_task(interface);
+
 	clear_bit(__FM10K_RESETTING, interface->state);
 
 	return err;
@@ -270,27 +354,80 @@ reinit_err:
 	return err;
 }
 
-static void fm10k_reinit(struct fm10k_intfc *interface)
+static void fm10k_detach_subtask(struct fm10k_intfc *interface)
 {
+	struct net_device *netdev = interface->netdev;
+	u32 __iomem *hw_addr;
+	u32 value;
 	int err;
 
-	fm10k_prepare_for_reset(interface);
+	/* do nothing if netdev is still present or hw_addr is set */
+	if (netif_device_present(netdev) || interface->hw.hw_addr)
+		return;
 
-	err = fm10k_handle_reset(interface);
-	if (err)
-		dev_err(&interface->pdev->dev,
-			"fm10k_handle_reset failed: %d\n", err);
+	/* We've lost the PCIe register space, and can no longer access the
+	 * device. Shut everything except the detach subtask down and prepare
+	 * to reset the device in case we recover. If we actually prepare for
+	 * reset, indicate that we're detached.
+	 */
+	if (fm10k_prepare_for_reset(interface))
+		set_bit(__FM10K_RESET_DETACHED, interface->state);
+
+	/* check the real address space to see if we've recovered */
+	hw_addr = READ_ONCE(interface->uc_addr);
+	value = readl(hw_addr);
+	if (~value) {
+		/* Make sure the reset was initiated because we detached,
+		 * otherwise we might race with a different reset flow.
+		 */
+		if (!test_and_clear_bit(__FM10K_RESET_DETACHED,
+					interface->state))
+			return;
+
+		/* Restore the hardware address */
+		interface->hw.hw_addr = interface->uc_addr;
+
+		/* PCIe link has been restored, and the device is active
+		 * again. Restore everything and reset the device.
+		 */
+		err = fm10k_handle_reset(interface);
+		if (err) {
+			netdev_err(netdev, "Unable to reset device: %d\n", err);
+			interface->hw.hw_addr = NULL;
+			return;
+		}
+
+		/* Re-attach the netdev */
+		netif_device_attach(netdev);
+		netdev_warn(netdev, "PCIe link restored, device now attached\n");
+		return;
+	}
 }
 
 static void fm10k_reset_subtask(struct fm10k_intfc *interface)
 {
+	int err;
+
 	if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED,
 				interface->flags))
 		return;
 
+	/* If another thread has already prepared to reset the device, we
+	 * should not attempt to handle a reset here, since we'd race with
+	 * that thread. This may happen if we suspend the device or if the
+	 * PCIe link is lost. In this case, we'll just ignore the RESET
+	 * request, as it will (eventually) be taken care of when the thread
+	 * which actually started the reset is finished.
+	 */
+	if (!fm10k_prepare_for_reset(interface))
+		return;
+
 	netdev_err(interface->netdev, "Reset interface\n");
 
-	fm10k_reinit(interface);
+	err = fm10k_handle_reset(interface);
+	if (err)
+		dev_err(&interface->pdev->dev,
+			"fm10k_handle_reset failed: %d\n", err);
 }
 
 /**
@@ -360,6 +497,10 @@ static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface)
  **/
 static void fm10k_mbx_subtask(struct fm10k_intfc *interface)
 {
+	/* If we're resetting, bail out */
+	if (test_bit(__FM10K_RESETTING, interface->state))
+		return;
+
 	/* process upstream mailbox and update device state */
 	fm10k_watchdog_update_host_state(interface);
 
@@ -609,9 +750,11 @@ static void fm10k_service_task(struct work_struct *work)
 
 	interface = container_of(work, struct fm10k_intfc, service_task);
 
+	/* Check whether we're detached first */
+	fm10k_detach_subtask(interface);
+
 	/* tasks run even when interface is down */
 	fm10k_mbx_subtask(interface);
-	fm10k_detach_subtask(interface);
 	fm10k_reset_subtask(interface);
 
 	/* tasks only run when interface is up */
@@ -623,6 +766,112 @@ static void fm10k_service_task(struct work_struct *work)
 }
 
 /**
+ * fm10k_macvlan_task - send queued MAC/VLAN requests to switch manager
+ * @work: pointer to work_struct containing our data
+ *
+ * This work item handles sending MAC/VLAN updates to the switch manager. When
+ * the interface is up, it will attempt to queue mailbox messages to the
+ * switch manager requesting updates for MAC/VLAN pairs. If the Tx fifo of the
+ * mailbox is full, it will reschedule itself to try again in a short while.
+ * This ensures that the driver does not overload the switch mailbox with too
+ * many simultaneous requests, causing an unnecessary reset.
+ **/
+static void fm10k_macvlan_task(struct work_struct *work)
+{
+	struct fm10k_macvlan_request *item;
+	struct fm10k_intfc *interface;
+	struct delayed_work *dwork;
+	struct list_head *requests;
+	struct fm10k_hw *hw;
+	unsigned long flags;
+
+	dwork = to_delayed_work(work);
+	interface = container_of(dwork, struct fm10k_intfc, macvlan_task);
+	hw = &interface->hw;
+	requests = &interface->macvlan_requests;
+
+	do {
+		/* Pop the first item off the list */
+		spin_lock_irqsave(&interface->macvlan_lock, flags);
+		item = list_first_entry_or_null(requests,
+						struct fm10k_macvlan_request,
+						list);
+		if (item)
+			list_del_init(&item->list);
+
+		spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+		/* We have no more items to process */
+		if (!item)
+			goto done;
+
+		fm10k_mbx_lock(interface);
+
+		/* Check that we have plenty of space to send the message. We
+		 * want to ensure that the mailbox stays low enough to avoid a
+		 * change in the host state, otherwise we may see spurious
+		 * link up / link down notifications.
+		 */
+		if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU + 5)) {
+			hw->mbx.ops.process(hw, &hw->mbx);
+			set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+			fm10k_mbx_unlock(interface);
+
+			/* Put the request back on the list */
+			spin_lock_irqsave(&interface->macvlan_lock, flags);
+			list_add(&item->list, requests);
+			spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+			break;
+		}
+
+		switch (item->type) {
+		case FM10K_MC_MAC_REQUEST:
+			hw->mac.ops.update_mc_addr(hw,
+						   item->mac.glort,
+						   item->mac.addr,
+						   item->mac.vid,
+						   item->set);
+			break;
+		case FM10K_UC_MAC_REQUEST:
+			hw->mac.ops.update_uc_addr(hw,
+						   item->mac.glort,
+						   item->mac.addr,
+						   item->mac.vid,
+						   item->set,
+						   0);
+			break;
+		case FM10K_VLAN_REQUEST:
+			hw->mac.ops.update_vlan(hw,
+						item->vlan.vid,
+						item->vlan.vsi,
+						item->set);
+			break;
+		default:
+			break;
+		}
+
+		fm10k_mbx_unlock(interface);
+
+		/* Free the item now that we've sent the update */
+		kfree(item);
+	} while (true);
+
+done:
+	WARN_ON(!test_bit(__FM10K_MACVLAN_SCHED, interface->state));
+
+	/* flush memory to make sure state is correct */
+	smp_mb__before_atomic();
+	clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
+
+	/* If a MAC/VLAN request was scheduled since we started, we should
+	 * re-schedule. However, there is no reason to re-schedule if there is
+	 * no work to do.
+	 */
+	if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
+		fm10k_macvlan_schedule(interface);
+}
+
+/**
  * fm10k_configure_tx_ring - Configure Tx ring after Reset
  * @interface: board private structure
  * @ring: structure containing ring specific data
@@ -1544,7 +1793,7 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface)
 	struct net_device *dev = interface->netdev;
 	struct fm10k_hw *hw = &interface->hw;
 	struct msix_entry *entry;
-	int ri = 0, ti = 0;
+	unsigned int ri = 0, ti = 0;
 	int vector, err;
 
 	entry = &interface->msix_entries[NON_Q_VECTORS(hw)];
@@ -1554,15 +1803,15 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface)
 
 		/* name the vector */
 		if (q_vector->tx.count && q_vector->rx.count) {
-			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
-				 "%s-TxRx-%d", dev->name, ri++);
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-TxRx-%u", dev->name, ri++);
 			ti++;
 		} else if (q_vector->rx.count) {
-			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
-				 "%s-rx-%d", dev->name, ri++);
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-rx-%u", dev->name, ri++);
 		} else if (q_vector->tx.count) {
-			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
-				 "%s-tx-%d", dev->name, ti++);
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-tx-%u", dev->name, ti++);
 		} else {
 			/* skip this unused q_vector */
 			continue;
@@ -1800,9 +2049,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
 		netdev->vlan_features |= NETIF_F_HIGHDMA;
 	}
 
-	/* delay any future reset requests */
-	interface->last_reset = jiffies + (10 * HZ);
-
 	/* reset and initialize the hardware so it is in a known state */
 	err = hw->mac.ops.reset_hw(hw);
 	if (err) {
@@ -1857,9 +2103,16 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
 	INIT_LIST_HEAD(&interface->vxlan_port);
 	INIT_LIST_HEAD(&interface->geneve_port);
 
+	/* Initialize the MAC/VLAN queue */
+	INIT_LIST_HEAD(&interface->macvlan_requests);
+
 	netdev_rss_key_fill(rss_key, sizeof(rss_key));
 	memcpy(interface->rssrk, rss_key, sizeof(rss_key));
 
+	/* Initialize the mailbox lock */
+	spin_lock_init(&interface->mbx_lock);
+	spin_lock_init(&interface->macvlan_lock);
+
 	/* Start off interface as being down */
 	set_bit(__FM10K_DOWN, interface->state);
 	set_bit(__FM10K_UPDATING_STATS, interface->state);
@@ -2063,10 +2316,12 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Initialize service timer and service task late in order to avoid
 	 * cleanup issues.
 	 */
-	setup_timer(&interface->service_timer, &fm10k_service_timer,
-		    (unsigned long)interface);
+	timer_setup(&interface->service_timer, fm10k_service_timer, 0);
 	INIT_WORK(&interface->service_task, fm10k_service_task);
 
+	/* Setup the MAC/VLAN queue */
+	INIT_DELAYED_WORK(&interface->macvlan_task, fm10k_macvlan_task);
+
 	/* kick off service timer now, even when interface is down */
 	mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
 
@@ -2079,8 +2334,9 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* enable SR-IOV after registering netdev to enforce PF/VF ordering */
 	fm10k_iov_configure(pdev, 0);
 
-	/* clear the service task disable bit to allow service task to start */
+	/* clear the service task disable bit and kick off service task */
 	clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
+	fm10k_service_event_schedule(interface);
 
 	return 0;
 
@@ -2118,8 +2374,11 @@ static void fm10k_remove(struct pci_dev *pdev)
 
 	del_timer_sync(&interface->service_timer);
 
-	set_bit(__FM10K_SERVICE_DISABLE, interface->state);
-	cancel_work_sync(&interface->service_task);
+	fm10k_stop_service_event(interface);
+	fm10k_stop_macvlan_task(interface);
+
+	/* Remove all pending MAC/VLAN requests */
+	fm10k_clear_macvlan_queue(interface, interface->glort, true);
 
 	/* free netdev, this may bounce the interrupts due to setup_tc */
 	if (netdev->reg_state == NETREG_REGISTERED)
@@ -2156,11 +2415,14 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface)
 	 * a surprise remove if the PCIe device is disabled while we're
 	 * stopped. We stop the watchdog task until after we resume software
 	 * activity.
+	 *
+	 * Note that the MAC/VLAN task will be stopped as part of preparing
+	 * for reset so we don't need to handle it here.
 	 */
-	set_bit(__FM10K_SERVICE_DISABLE, interface->state);
-	cancel_work_sync(&interface->service_task);
+	fm10k_stop_service_event(interface);
 
-	fm10k_prepare_for_reset(interface);
+	if (fm10k_prepare_for_reset(interface))
+		set_bit(__FM10K_RESET_SUSPENDED, interface->state);
 }
 
 static int fm10k_handle_resume(struct fm10k_intfc *interface)
@@ -2168,6 +2430,13 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
 	struct fm10k_hw *hw = &interface->hw;
 	int err;
 
+	/* Even if we didn't properly prepare for reset in
+	 * fm10k_prepare_suspend, we'll attempt to resume anyways.
+	 */
+	if (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state))
+		dev_warn(&interface->pdev->dev,
+			 "Device was shut down as part of suspend... Attempting to recover\n");
+
 	/* reset statistics starting values */
 	hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
 
@@ -2185,45 +2454,30 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
 	interface->link_down_event = jiffies + (HZ);
 	set_bit(__FM10K_LINK_DOWN, interface->state);
 
-	/* clear the service task disable bit to allow service task to start */
-	clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
-	fm10k_service_event_schedule(interface);
+	/* restart the service task */
+	fm10k_start_service_event(interface);
+
+	/* Restart the MAC/VLAN request queue in-case of outstanding events */
+	fm10k_macvlan_schedule(interface);
 
 	return err;
 }
 
 #ifdef CONFIG_PM
 /**
- * fm10k_resume - Restore device to pre-sleep state
- * @pdev: PCI device information struct
+ * fm10k_resume - Generic PM resume hook
+ * @dev: generic device structure
  *
- * fm10k_resume is called after the system has powered back up from a sleep
- * state and is ready to resume operation.  This function is meant to restore
- * the device back to its pre-sleep state.
+ * Generic PM hook used when waking the device from a low power state after
+ * suspend or hibernation. This function does not need to handle lower PCIe
+ * device state as the stack takes care of that for us.
  **/
-static int fm10k_resume(struct pci_dev *pdev)
+static int fm10k_resume(struct device *dev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
 	struct net_device *netdev = interface->netdev;
 	struct fm10k_hw *hw = &interface->hw;
-	u32 err;
-
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-
-	/* pci_restore_state clears dev->state_saved so call
-	 * pci_save_state to restore it.
-	 */
-	pci_save_state(pdev);
-
-	err = pci_enable_device_mem(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
-		return err;
-	}
-	pci_set_master(pdev);
-
-	pci_wake_from_d3(pdev, false);
+	int err;
 
 	/* refresh hw_addr in case it was dropped */
 	hw->hw_addr = interface->uc_addr;
@@ -2238,36 +2492,27 @@ static int fm10k_resume(struct pci_dev *pdev)
 }
 
 /**
- * fm10k_suspend - Prepare the device for a system sleep state
- * @pdev: PCI device information struct
+ * fm10k_suspend - Generic PM suspend hook
+ * @dev: generic device structure
  *
- * fm10k_suspend is meant to shutdown the device prior to the system entering
- * a sleep state.  The fm10k hardware does not support wake on lan so the
- * driver simply needs to shut down the device so it is in a low power state.
+ * Generic PM hook used when setting the device into a low power state for
+ * system suspend or hibernation. This function does not need to handle lower
+ * PCIe device state as the stack takes care of that for us.
  **/
-static int fm10k_suspend(struct pci_dev *pdev,
-			 pm_message_t __always_unused state)
+static int fm10k_suspend(struct device *dev)
 {
-	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
 	struct net_device *netdev = interface->netdev;
-	int err = 0;
 
 	netif_device_detach(netdev);
 
 	fm10k_prepare_suspend(interface);
 
-	err = pci_save_state(pdev);
-	if (err)
-		return err;
-
-	pci_disable_device(pdev);
-	pci_wake_from_d3(pdev, false);
-	pci_set_power_state(pdev, PCI_D3hot);
-
 	return 0;
 }
 
 #endif /* CONFIG_PM */
+
 /**
  * fm10k_io_error_detected - called when PCI error is detected
  * @pdev: Pointer to PCI device
@@ -2343,11 +2588,18 @@ static void fm10k_io_resume(struct pci_dev *pdev)
 
 	if (err)
 		dev_warn(&pdev->dev,
-			 "fm10k_io_resume failed: %d\n", err);
+			 "%s failed: %d\n", __func__, err);
 	else
 		netif_device_attach(netdev);
 }
 
+/**
+ * fm10k_io_reset_prepare - called when PCI function is about to be reset
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the PCI function is about to be reset,
+ * allowing the device driver to prepare for it.
+ */
 static void fm10k_io_reset_prepare(struct pci_dev *pdev)
 {
 	/* warn incase we have any active VF devices */
@@ -2357,6 +2609,13 @@ static void fm10k_io_reset_prepare(struct pci_dev *pdev)
 	fm10k_prepare_suspend(pci_get_drvdata(pdev));
 }
 
+/**
+ * fm10k_io_reset_done - called when PCI function has finished resetting
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called just after the PCI function is reset, such as via
+ * /sys/class/net/<enpX>/device/reset or similar.
+ */
 static void fm10k_io_reset_done(struct pci_dev *pdev)
 {
 	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
@@ -2364,7 +2623,7 @@ static void fm10k_io_reset_done(struct pci_dev *pdev)
 
 	if (err) {
 		dev_warn(&pdev->dev,
-			 "fm10k_io_reset_notify failed: %d\n", err);
+			 "%s failed: %d\n", __func__, err);
 		netif_device_detach(interface->netdev);
 	}
 }
@@ -2377,15 +2636,18 @@ static const struct pci_error_handlers fm10k_err_handler = {
 	.reset_done = fm10k_io_reset_done,
 };
 
+static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume);
+
 static struct pci_driver fm10k_driver = {
 	.name			= fm10k_driver_name,
 	.id_table		= fm10k_pci_tbl,
 	.probe			= fm10k_probe,
 	.remove			= fm10k_remove,
 #ifdef CONFIG_PM
-	.suspend		= fm10k_suspend,
-	.resume			= fm10k_resume,
-#endif
+	.driver = {
+		.pm		= &fm10k_pm_ops,
+	},
+#endif /* CONFIG_PM */
 	.sriov_configure	= fm10k_iov_configure,
 	.err_handler		= &fm10k_err_handler
 };
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 40ee0242a80a..425d814aed4d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1186,7 +1186,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
  * Will report an error if the VLAN ID is out of range. For VID = 0, it will
  * return either the pf_vid or sw_vid depending on which one is set.
  */
-static s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
+s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
 {
 	if (!vid)
 		return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid;
@@ -1334,19 +1334,19 @@ static u8 fm10k_iov_supported_xcast_mode_pf(struct fm10k_vf_info *vf_info,
 	case FM10K_XCAST_MODE_PROMISC:
 		if (vf_flags & FM10K_VF_FLAG_PROMISC_CAPABLE)
 			return FM10K_XCAST_MODE_PROMISC;
-		/* fallthough */
+		/* fall through */
 	case FM10K_XCAST_MODE_ALLMULTI:
 		if (vf_flags & FM10K_VF_FLAG_ALLMULTI_CAPABLE)
 			return FM10K_XCAST_MODE_ALLMULTI;
-		/* fallthough */
+		/* fall through */
 	case FM10K_XCAST_MODE_MULTI:
 		if (vf_flags & FM10K_VF_FLAG_MULTI_CAPABLE)
 			return FM10K_XCAST_MODE_MULTI;
-		/* fallthough */
+		/* fall through */
 	case FM10K_XCAST_MODE_NONE:
 		if (vf_flags & FM10K_VF_FLAG_NONE_CAPABLE)
 			return FM10K_XCAST_MODE_NONE;
-		/* fallthough */
+		/* fall through */
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index 3336d3c10760..e04d41f1a532 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -114,6 +114,7 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[];
 #define FM10K_PF_MSG_ERR_HANDLER(msg, func) \
 	FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_##msg, fm10k_err_msg_attr, func)
 
+s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid);
 s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
 s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
 			      struct fm10k_mbx_info *);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d0c1bf5441d8..5829715fa342 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -54,6 +54,9 @@
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
 #include "i40e_type.h"
 #include "i40e_prototype.h"
 #include "i40e_client.h"
@@ -77,6 +80,7 @@
 #define i40e_default_queues_per_vmdq(pf) \
 		(((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1)
 #define I40E_DEFAULT_QUEUES_PER_VF	4
+#define I40E_MAX_VF_QUEUES		16
 #define I40E_DEFAULT_QUEUES_PER_TC	1 /* should be a power of 2 */
 #define i40e_pf_get_max_q_per_tc(pf) \
 		(((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64)
@@ -86,6 +90,7 @@
 #define I40E_AQ_LEN			256
 #define I40E_AQ_WORK_LIMIT		66 /* max number of VFs + a little */
 #define I40E_MAX_USER_PRIORITY		8
+#define I40E_MAX_QUEUES_PER_CH		64
 #define I40E_DEFAULT_TRAFFIC_CLASS	BIT(0)
 #define I40E_DEFAULT_MSG_ENABLE		4
 #define I40E_QUEUE_WAIT_RETRY_LIMIT	10
@@ -125,6 +130,11 @@
 /* default to trying for four seconds */
 #define I40E_TRY_LINK_TIMEOUT	(4 * HZ)
 
+/* BW rate limiting */
+#define I40E_BW_CREDIT_DIVISOR		50 /* 50Mbps per BW credit */
+#define I40E_BW_MBPS_DIVISOR		125000 /* rate / (1000000 / 8) Mbps */
+#define I40E_MAX_BW_INACTIVE_ACCUM	4 /* accumulate 4 credits max */
+
 /* driver state flags */
 enum i40e_state_t {
 	__I40E_TESTING,
@@ -136,6 +146,7 @@ enum i40e_state_t {
 	__I40E_MDD_EVENT_PENDING,
 	__I40E_VFLR_EVENT_PENDING,
 	__I40E_RESET_RECOVERY_PENDING,
+	__I40E_MISC_IRQ_REQUESTED,
 	__I40E_RESET_INTR_RECEIVED,
 	__I40E_REINIT_REQUESTED,
 	__I40E_PF_RESET_REQUESTED,
@@ -155,6 +166,8 @@ enum i40e_state_t {
 	__I40E_STATE_SIZE__,
 };
 
+#define I40E_PF_RESET_FLAG	BIT_ULL(__I40E_PF_RESET_REQUESTED)
+
 /* VSI state flags */
 enum i40e_vsi_state_t {
 	__I40E_VSI_DOWN,
@@ -242,6 +255,58 @@ struct i40e_fdir_filter {
 	u32 fd_id;
 };
 
+#define I40E_CLOUD_FIELD_OMAC	0x01
+#define I40E_CLOUD_FIELD_IMAC	0x02
+#define I40E_CLOUD_FIELD_IVLAN	0x04
+#define I40E_CLOUD_FIELD_TEN_ID	0x08
+#define I40E_CLOUD_FIELD_IIP	0x10
+
+#define I40E_CLOUD_FILTER_FLAGS_OMAC	I40E_CLOUD_FIELD_OMAC
+#define I40E_CLOUD_FILTER_FLAGS_IMAC	I40E_CLOUD_FIELD_IMAC
+#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN	(I40E_CLOUD_FIELD_IMAC | \
+						 I40E_CLOUD_FIELD_IVLAN)
+#define I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID	(I40E_CLOUD_FIELD_IMAC | \
+						 I40E_CLOUD_FIELD_TEN_ID)
+#define I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC (I40E_CLOUD_FIELD_OMAC | \
+						  I40E_CLOUD_FIELD_IMAC | \
+						  I40E_CLOUD_FIELD_TEN_ID)
+#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID (I40E_CLOUD_FIELD_IMAC | \
+						   I40E_CLOUD_FIELD_IVLAN | \
+						   I40E_CLOUD_FIELD_TEN_ID)
+#define I40E_CLOUD_FILTER_FLAGS_IIP	I40E_CLOUD_FIELD_IIP
+
+struct i40e_cloud_filter {
+	struct hlist_node cloud_node;
+	unsigned long cookie;
+	/* cloud filter input set follows */
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	__be16 vlan_id;
+	u16 seid;       /* filter control */
+	__be16 dst_port;
+	__be16 src_port;
+	u32 tenant_id;
+	union {
+		struct {
+			struct in_addr dst_ip;
+			struct in_addr src_ip;
+		} v4;
+		struct {
+			struct in6_addr dst_ip6;
+			struct in6_addr src_ip6;
+		} v6;
+	} ip;
+#define dst_ipv6	ip.v6.dst_ip6.s6_addr32
+#define src_ipv6	ip.v6.src_ip6.s6_addr32
+#define dst_ipv4	ip.v4.dst_ip.s_addr
+#define src_ipv4	ip.v4.src_ip.s_addr
+	u16 n_proto;    /* Ethernet Protocol */
+	u8 ip_proto;    /* IPPROTO value */
+	u8 flags;
+#define I40E_CLOUD_TNL_TYPE_NONE        0xff
+	u8 tunnel_type;
+};
+
 #define I40E_ETH_P_LLDP			0x88cc
 
 #define I40E_DCB_PRIO_TYPE_STRICT	0
@@ -336,6 +401,25 @@ struct i40e_flex_pit {
 	u8 pit_index;
 };
 
+struct i40e_channel {
+	struct list_head list;
+	bool initialized;
+	u8 type;
+	u16 vsi_number; /* Assigned VSI number from AQ 'Add VSI' response */
+	u16 stat_counter_idx;
+	u16 base_queue;
+	u16 num_queue_pairs; /* Requested by user */
+	u16 seid;
+
+	u8 enabled_tc;
+	struct i40e_aqc_vsi_properties_data info;
+
+	u64 max_tx_rate;
+
+	/* track this channel belongs to which VSI */
+	struct i40e_vsi *parent_vsi;
+};
+
 /* struct that defines the Ethernet device */
 struct i40e_pf {
 	struct pci_dev *pdev;
@@ -348,7 +432,7 @@ struct i40e_pf {
 	u16 num_vmdq_vsis;         /* num vmdq vsis this PF has set up */
 	u16 num_vmdq_qps;          /* num queue pairs per vmdq pool */
 	u16 num_vmdq_msix;         /* num queue vectors per vmdq pool */
-	u16 num_req_vfs;           /* num VFs requested for this VF */
+	u16 num_req_vfs;           /* num VFs requested for this PF */
 	u16 num_vf_qps;            /* num queue pairs per VF */
 	u16 num_lan_qps;           /* num lan queues this PF has set up */
 	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
@@ -390,6 +474,9 @@ struct i40e_pf {
 	struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
 	u16 pending_udp_bitmap;
 
+	struct hlist_head cloud_filter_list;
+	u16 num_cloud_filters;
+
 	enum i40e_interrupt_policy int_policy;
 	u16 rx_itr_default;
 	u16 tx_itr_default;
@@ -401,55 +488,60 @@ struct i40e_pf {
 	struct timer_list service_timer;
 	struct work_struct service_task;
 
-	u64 hw_features;
-#define I40E_HW_RSS_AQ_CAPABLE			BIT_ULL(0)
-#define I40E_HW_128_QP_RSS_CAPABLE		BIT_ULL(1)
-#define I40E_HW_ATR_EVICT_CAPABLE		BIT_ULL(2)
-#define I40E_HW_WB_ON_ITR_CAPABLE		BIT_ULL(3)
-#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE	BIT_ULL(4)
-#define I40E_HW_NO_PCI_LINK_CHECK		BIT_ULL(5)
-#define I40E_HW_100M_SGMII_CAPABLE		BIT_ULL(6)
-#define I40E_HW_NO_DCB_SUPPORT			BIT_ULL(7)
-#define I40E_HW_USE_SET_LLDP_MIB		BIT_ULL(8)
-#define I40E_HW_GENEVE_OFFLOAD_CAPABLE		BIT_ULL(9)
-#define I40E_HW_PTP_L4_CAPABLE			BIT_ULL(10)
-#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE		BIT_ULL(11)
-#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE	BIT_ULL(12)
-#define I40E_HW_HAVE_CRT_RETIMER		BIT_ULL(13)
-#define I40E_HW_OUTER_UDP_CSUM_CAPABLE		BIT_ULL(14)
-#define I40E_HW_PHY_CONTROLS_LEDS		BIT_ULL(15)
-#define I40E_HW_STOP_FW_LLDP			BIT_ULL(16)
-#define I40E_HW_PORT_ID_VALID			BIT_ULL(17)
-#define I40E_HW_RESTART_AUTONEG			BIT_ULL(18)
-
-	u64 flags;
-#define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(1)
-#define I40E_FLAG_MSI_ENABLED			BIT_ULL(2)
-#define I40E_FLAG_MSIX_ENABLED			BIT_ULL(3)
-#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT_ULL(4)
-#define I40E_FLAG_RSS_ENABLED			BIT_ULL(6)
-#define I40E_FLAG_VMDQ_ENABLED			BIT_ULL(7)
-#define I40E_FLAG_IWARP_ENABLED			BIT_ULL(10)
-#define I40E_FLAG_FILTER_SYNC			BIT_ULL(15)
-#define I40E_FLAG_SERVICE_CLIENT_REQUESTED	BIT_ULL(16)
-#define I40E_FLAG_SRIOV_ENABLED			BIT_ULL(19)
-#define I40E_FLAG_DCB_ENABLED			BIT_ULL(20)
-#define I40E_FLAG_FD_SB_ENABLED			BIT_ULL(21)
-#define I40E_FLAG_FD_ATR_ENABLED		BIT_ULL(22)
-#define I40E_FLAG_FD_SB_AUTO_DISABLED		BIT_ULL(23)
-#define I40E_FLAG_FD_ATR_AUTO_DISABLED		BIT_ULL(24)
-#define I40E_FLAG_PTP				BIT_ULL(25)
-#define I40E_FLAG_MFP_ENABLED			BIT_ULL(26)
-#define I40E_FLAG_UDP_FILTER_SYNC		BIT_ULL(27)
-#define I40E_FLAG_DCB_CAPABLE			BIT_ULL(29)
-#define I40E_FLAG_VEB_STATS_ENABLED		BIT_ULL(37)
-#define I40E_FLAG_LINK_POLLING_ENABLED		BIT_ULL(39)
-#define I40E_FLAG_VEB_MODE_ENABLED		BIT_ULL(40)
-#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT_ULL(51)
-#define I40E_FLAG_CLIENT_RESET			BIT_ULL(54)
-#define I40E_FLAG_TEMP_LINK_POLLING		BIT_ULL(55)
-#define I40E_FLAG_CLIENT_L2_CHANGE		BIT_ULL(56)
-#define I40E_FLAG_LEGACY_RX			BIT_ULL(58)
+	u32 hw_features;
+#define I40E_HW_RSS_AQ_CAPABLE			BIT(0)
+#define I40E_HW_128_QP_RSS_CAPABLE		BIT(1)
+#define I40E_HW_ATR_EVICT_CAPABLE		BIT(2)
+#define I40E_HW_WB_ON_ITR_CAPABLE		BIT(3)
+#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE	BIT(4)
+#define I40E_HW_NO_PCI_LINK_CHECK		BIT(5)
+#define I40E_HW_100M_SGMII_CAPABLE		BIT(6)
+#define I40E_HW_NO_DCB_SUPPORT			BIT(7)
+#define I40E_HW_USE_SET_LLDP_MIB		BIT(8)
+#define I40E_HW_GENEVE_OFFLOAD_CAPABLE		BIT(9)
+#define I40E_HW_PTP_L4_CAPABLE			BIT(10)
+#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE		BIT(11)
+#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE	BIT(12)
+#define I40E_HW_HAVE_CRT_RETIMER		BIT(13)
+#define I40E_HW_OUTER_UDP_CSUM_CAPABLE		BIT(14)
+#define I40E_HW_PHY_CONTROLS_LEDS		BIT(15)
+#define I40E_HW_STOP_FW_LLDP			BIT(16)
+#define I40E_HW_PORT_ID_VALID			BIT(17)
+#define I40E_HW_RESTART_AUTONEG			BIT(18)
+
+	u32 flags;
+#define I40E_FLAG_RX_CSUM_ENABLED		BIT(0)
+#define I40E_FLAG_MSI_ENABLED			BIT(1)
+#define I40E_FLAG_MSIX_ENABLED			BIT(2)
+#define I40E_FLAG_RSS_ENABLED			BIT(3)
+#define I40E_FLAG_VMDQ_ENABLED			BIT(4)
+#define I40E_FLAG_FILTER_SYNC			BIT(5)
+#define I40E_FLAG_SRIOV_ENABLED			BIT(6)
+#define I40E_FLAG_DCB_CAPABLE			BIT(7)
+#define I40E_FLAG_DCB_ENABLED			BIT(8)
+#define I40E_FLAG_FD_SB_ENABLED			BIT(9)
+#define I40E_FLAG_FD_ATR_ENABLED		BIT(10)
+#define I40E_FLAG_FD_SB_AUTO_DISABLED		BIT(11)
+#define I40E_FLAG_FD_ATR_AUTO_DISABLED		BIT(12)
+#define I40E_FLAG_MFP_ENABLED			BIT(13)
+#define I40E_FLAG_UDP_FILTER_SYNC		BIT(14)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT(15)
+#define I40E_FLAG_VEB_MODE_ENABLED		BIT(16)
+#define I40E_FLAG_VEB_STATS_ENABLED		BIT(17)
+#define I40E_FLAG_LINK_POLLING_ENABLED		BIT(18)
+#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT(19)
+#define I40E_FLAG_TEMP_LINK_POLLING		BIT(20)
+#define I40E_FLAG_LEGACY_RX			BIT(21)
+#define I40E_FLAG_PTP				BIT(22)
+#define I40E_FLAG_IWARP_ENABLED			BIT(23)
+#define I40E_FLAG_SERVICE_CLIENT_REQUESTED	BIT(24)
+#define I40E_FLAG_CLIENT_L2_CHANGE		BIT(25)
+#define I40E_FLAG_CLIENT_RESET			BIT(26)
+#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT(27)
+#define I40E_FLAG_SOURCE_PRUNING_DISABLED	BIT(28)
+#define I40E_FLAG_TC_MQPRIO			BIT(29)
+#define I40E_FLAG_FD_SB_INACTIVE		BIT(30)
+#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT(31)
 
 	struct i40e_client_instance *cinst;
 	bool stat_offsets_loaded;
@@ -530,6 +622,10 @@ struct i40e_pf {
 	u32 ioremap_len;
 	u32 fd_inv;
 	u16 phy_led_val;
+
+	u16 override_q_count;
+	u16 last_sw_conf_flags;
+	u16 last_sw_conf_valid_flags;
 };
 
 /**
@@ -673,6 +769,7 @@ struct i40e_vsi {
 	enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
 	s16 vf_id;		/* Virtual function ID for SRIOV VSIs */
 
+	struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
 	struct i40e_tc_configuration tc_config;
 	struct i40e_aqc_vsi_properties_data info;
 
@@ -694,6 +791,17 @@ struct i40e_vsi {
 	bool current_isup;	/* Sync 'link up' logging */
 	enum i40e_aq_link_speed current_speed;	/* Sync link speed logging */
 
+	/* channel specific fields */
+	u16 cnt_q_avail;	/* num of queues available for channel usage */
+	u16 orig_rss_size;
+	u16 current_rss_size;
+	bool reconfig_rss;
+
+	u16 next_base_queue;	/* next queue to be used for channel setup */
+
+	struct list_head ch_list;
+	u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS];
+
 	void *priv;	/* client driver data reference. */
 
 	/* VSI specific handlers */
@@ -945,9 +1053,6 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
 	struct i40e_hw *hw = &pf->hw;
 	u32 val;
 
-	/* definitely clear the PBA here, as this function is meant to
-	 * clean out all previous interrupts AND enable the interrupt
-	 */
 	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
 	      I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
 	      (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
@@ -956,7 +1061,7 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
 }
 
 void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf);
-void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba);
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf);
 int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 int i40e_open(struct net_device *netdev);
 int i40e_close(struct net_device *netdev);
@@ -1001,4 +1106,7 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 {
 	return !!vsi->xdp_prog;
 }
+
+int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
+int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
 #endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index ba04988e0598..9dcb2a961197 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -607,6 +607,18 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
 			   &oem_lo);
 	hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo;
 
+	if (hw->mac.type == I40E_MAC_XL710 &&
+	    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+	    hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+		hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+	}
+
+	/* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
+	if (hw->aq.api_maj_ver > 1 ||
+	    (hw->aq.api_maj_ver == 1 &&
+	     hw->aq.api_min_ver >= 7))
+		hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+
 	if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
 		ret_code = I40E_ERR_FIRMWARE_API_VERSION;
 		goto init_adminq_free_arq;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 5d5f422cbae5..b0188b8f91ba 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -34,7 +34,15 @@
  */
 
 #define I40E_FW_API_VERSION_MAJOR	0x0001
-#define I40E_FW_API_VERSION_MINOR	0x0005
+#define I40E_FW_API_VERSION_MINOR_X722	0x0005
+#define I40E_FW_API_VERSION_MINOR_X710	0x0007
+
+#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
+					I40E_FW_API_VERSION_MINOR_X710 : \
+					I40E_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs  */
+#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
 
 struct i40e_aq_desc {
 	__le16 flags;
@@ -236,6 +244,8 @@ enum i40e_admin_queue_opc {
 	i40e_aqc_opc_set_phy_debug		= 0x0622,
 	i40e_aqc_opc_upload_ext_phy_fm		= 0x0625,
 	i40e_aqc_opc_run_phy_activity		= 0x0626,
+	i40e_aqc_opc_set_phy_register		= 0x0628,
+	i40e_aqc_opc_get_phy_register		= 0x0629,
 
 	/* NVM commands */
 	i40e_aqc_opc_nvm_read			= 0x0701,
@@ -765,7 +775,50 @@ struct i40e_aqc_set_switch_config {
 #define I40E_AQ_SET_SWITCH_CFG_PROMISC		0x0001
 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER	0x0002
 	__le16	valid_flags;
-	u8	reserved[12];
+	/* The ethertype in switch_tag is dropped on ingress and used
+	 * internally by the switch. Set this to zero for the default
+	 * of 0x88a8 (802.1ad). Should be zero for firmware API
+	 * versions lower than 1.7.
+	 */
+	__le16	switch_tag;
+	/* The ethertypes in first_tag and second_tag are used to
+	 * match the outer and inner VLAN tags (respectively) when HW
+	 * double VLAN tagging is enabled via the set port parameters
+	 * AQ command. Otherwise these are both ignored. Set them to
+	 * zero for their defaults of 0x8100 (802.1Q). Should be zero
+	 * for firmware API versions lower than 1.7.
+	 */
+	__le16	first_tag;
+	__le16	second_tag;
+	/* Next byte is split into following:
+	 * Bit 7    : 0 : No action, 1: Switch to mode defined by bits 6:0
+	 * Bit 6    : 0 : Destination Port, 1: source port
+	 * Bit 5..4 : L4 type
+	 * 0: rsvd
+	 * 1: TCP
+	 * 2: UDP
+	 * 3: Both TCP and UDP
+	 * Bits 3:0 Mode
+	 * 0: default mode
+	 * 1: L4 port only mode
+	 * 2: non-tunneled mode
+	 * 3: tunneled mode
+	 */
+#define I40E_AQ_SET_SWITCH_BIT7_VALID		0x80
+
+#define I40E_AQ_SET_SWITCH_L4_SRC_PORT		0x40
+
+#define I40E_AQ_SET_SWITCH_L4_TYPE_RSVD		0x00
+#define I40E_AQ_SET_SWITCH_L4_TYPE_TCP		0x10
+#define I40E_AQ_SET_SWITCH_L4_TYPE_UDP		0x20
+#define I40E_AQ_SET_SWITCH_L4_TYPE_BOTH		0x30
+
+#define I40E_AQ_SET_SWITCH_MODE_DEFAULT		0x00
+#define I40E_AQ_SET_SWITCH_MODE_L4_PORT		0x01
+#define I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL	0x02
+#define I40E_AQ_SET_SWITCH_MODE_TUNNEL		0x03
+	u8	mode;
+	u8	rsvd5[5];
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config);
@@ -1318,14 +1371,16 @@ struct i40e_aqc_add_remove_cloud_filters {
 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT	0
 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK	(0x3FF << \
 					I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT)
-	u8	reserved2[4];
+	u8	big_buffer_flag;
+#define I40E_AQC_ADD_CLOUD_CMD_BB	1
+	u8	reserved2[3];
 	__le32	addr_high;
 	__le32	addr_low;
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters);
 
-struct i40e_aqc_add_remove_cloud_filters_element_data {
+struct i40e_aqc_cloud_filters_element_data {
 	u8	outer_mac[6];
 	u8	inner_mac[6];
 	__le16	inner_vlan;
@@ -1337,6 +1392,9 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
 		struct {
 			u8 data[16];
 		} v6;
+		struct {
+			__le16 data[8];
+		} raw_v6;
 	} ipaddr;
 	__le16	flags;
 #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT			0
@@ -1355,6 +1413,10 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC			0x000A
 #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC	0x000B
 #define I40E_AQC_ADD_CLOUD_FILTER_IIP			0x000C
+/* 0x0010 to 0x0017 is for custom filters */
+#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT		0x0010 /* Dest IP + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT		0x0011 /* Dest MAC + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT		0x0012 /* Dest MAC + VLAN + L4 Port */
 
 #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE		0x0080
 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT			6
@@ -1389,6 +1451,49 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
 	u8	response_reserved[7];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data);
+
+/* i40e_aqc_cloud_filters_element_bb is used when
+ * I40E_AQC_CLOUD_CMD_BB flag is set.
+ */
+struct i40e_aqc_cloud_filters_element_bb {
+	struct i40e_aqc_cloud_filters_element_data element;
+	u16     general_fields[32];
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0	0
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1	1
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2	2
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0	3
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1	4
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2	5
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0	6
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1	7
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2	8
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0	9
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1	10
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2	11
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0	12
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1	13
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2	14
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0	15
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1	16
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2	17
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3	18
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4	19
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5	20
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6	21
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7	22
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0	23
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1	24
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2	25
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3	26
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4	27
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5	28
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6	29
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7	30
+};
+
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb);
+
 struct i40e_aqc_remove_cloud_filters_completion {
 	__le16 perfect_ovlan_used;
 	__le16 perfect_ovlan_free;
@@ -1400,6 +1505,60 @@ struct i40e_aqc_remove_cloud_filters_completion {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion);
 
+/* Replace filter Command 0x025F
+ * uses the i40e_aqc_replace_cloud_filters,
+ * and the generic indirect completion structure
+ */
+struct i40e_filter_data {
+	u8 filter_type;
+	u8 input[3];
+};
+
+I40E_CHECK_STRUCT_LEN(4, i40e_filter_data);
+
+struct i40e_aqc_replace_cloud_filters_cmd {
+	u8      valid_flags;
+#define I40E_AQC_REPLACE_L1_FILTER		0x0
+#define I40E_AQC_REPLACE_CLOUD_FILTER		0x1
+#define I40E_AQC_GET_CLOUD_FILTERS		0x2
+#define I40E_AQC_MIRROR_CLOUD_FILTER		0x4
+#define I40E_AQC_HIGH_PRIORITY_CLOUD_FILTER	0x8
+	u8      old_filter_type;
+	u8      new_filter_type;
+	u8      tr_bit;
+	u8      reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd);
+
+struct i40e_aqc_replace_cloud_filters_cmd_buf {
+	u8      data[32];
+/* Filter type INPUT codes*/
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_ENTRIES_MAX	3
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED	BIT(7)
+
+/* Field Vector offsets */
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_MAC_DA	0
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_ETH	6
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG	7
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_VLAN	8
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_OVLAN	9
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN	10
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY	11
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC	12
+/* big FLU */
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IP_DA	14
+/* big FLU */
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_OIP_DA	15
+
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_INNER_VLAN	37
+	struct i40e_filter_data filters[8];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf);
+
 /* Add Mirror Rule (indirect or direct 0x0260)
  * Delete Mirror Rule (indirect or direct 0x0261)
  * note: some rule types (4,5) do not use an external buffer.
@@ -1726,6 +1885,8 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_10GBASE_CR1_CU		= 0xB,
 	I40E_PHY_TYPE_10GBASE_AOC		= 0xC,
 	I40E_PHY_TYPE_40GBASE_AOC		= 0xD,
+	I40E_PHY_TYPE_UNRECOGNIZED		= 0xE,
+	I40E_PHY_TYPE_UNSUPPORTED		= 0xF,
 	I40E_PHY_TYPE_100BASE_TX		= 0x11,
 	I40E_PHY_TYPE_1000BASE_T		= 0x12,
 	I40E_PHY_TYPE_10GBASE_T			= 0x13,
@@ -1744,7 +1905,12 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_25GBASE_CR		= 0x20,
 	I40E_PHY_TYPE_25GBASE_SR		= 0x21,
 	I40E_PHY_TYPE_25GBASE_LR		= 0x22,
-	I40E_PHY_TYPE_MAX
+	I40E_PHY_TYPE_25GBASE_AOC		= 0x23,
+	I40E_PHY_TYPE_25GBASE_ACC		= 0x24,
+	I40E_PHY_TYPE_MAX,
+	I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP	= 0xFD,
+	I40E_PHY_TYPE_EMPTY			= 0xFE,
+	I40E_PHY_TYPE_DEFAULT			= 0xFF,
 };
 
 #define I40E_LINK_SPEED_100MB_SHIFT	0x1
@@ -1801,6 +1967,8 @@ struct i40e_aq_get_phy_abilities_resp {
 #define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
 #define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
 #define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+#define I40E_AQ_PHY_TYPE_EXT_25G_AOC	0x10
+#define I40E_AQ_PHY_TYPE_EXT_25G_ACC	0x20
 	u8	fec_cfg_curr_mod_ext_info;
 #define I40E_AQ_ENABLE_FEC_KR		0x01
 #define I40E_AQ_ENABLE_FEC_RS		0x02
@@ -1934,19 +2102,31 @@ struct i40e_aqc_get_link_status {
 #define I40E_AQ_25G_SERDES_UCODE_ERR	0X04
 #define I40E_AQ_25G_NIMB_UCODE_ERR	0X05
 	u8	loopback; /* use defines from i40e_aqc_set_lb_mode */
+/* Since firmware API 1.7 loopback field keeps power class info as well */
+#define I40E_AQ_LOOPBACK_MASK		0x07
+#define I40E_AQ_PWR_CLASS_SHIFT_LB	6
+#define I40E_AQ_PWR_CLASS_MASK_LB	(0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB)
 	__le16	max_frame_size;
 	u8	config;
 #define I40E_AQ_CONFIG_FEC_KR_ENA	0x01
 #define I40E_AQ_CONFIG_FEC_RS_ENA	0x02
 #define I40E_AQ_CONFIG_CRC_ENA		0x04
 #define I40E_AQ_CONFIG_PACING_MASK	0x78
-	u8	power_desc;
+	union {
+		struct {
+			u8	power_desc;
 #define I40E_AQ_LINK_POWER_CLASS_1	0x00
 #define I40E_AQ_LINK_POWER_CLASS_2	0x01
 #define I40E_AQ_LINK_POWER_CLASS_3	0x02
 #define I40E_AQ_LINK_POWER_CLASS_4	0x03
 #define I40E_AQ_PWR_CLASS_MASK		0x03
-	u8	reserved[4];
+			u8	reserved[4];
+		};
+		struct {
+			u8	link_type[4];
+			u8	link_type_ext;
+		};
+	};
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
@@ -2029,6 +2209,22 @@ struct i40e_aqc_run_phy_activity {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity);
 
+/* Set PHY Register command (0x0628) */
+/* Get PHY Register command (0x0629) */
+struct i40e_aqc_phy_register_access {
+	u8	phy_interface;
+#define I40E_AQ_PHY_REG_ACCESS_INTERNAL	0
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL	1
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE	2
+	u8	dev_address;
+	u8	reserved1[2];
+	__le32	reg_address;
+	__le32	reg_value;
+	u8	reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 111426ba5fbc..0203665cb53c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1180,6 +1180,8 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
 	case I40E_PHY_TYPE_40GBASE_AOC:
 	case I40E_PHY_TYPE_10GBASE_AOC:
 	case I40E_PHY_TYPE_25GBASE_CR:
+	case I40E_PHY_TYPE_25GBASE_AOC:
+	case I40E_PHY_TYPE_25GBASE_ACC:
 		media = I40E_MEDIA_TYPE_DA;
 		break;
 	case I40E_PHY_TYPE_1000BASE_KX:
@@ -1567,34 +1569,57 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
 	struct i40e_aq_desc desc;
 	i40e_status status;
 	u16 abilities_size = sizeof(struct i40e_aq_get_phy_abilities_resp);
+	u16 max_delay = I40E_MAX_PHY_TIMEOUT, total_delay = 0;
 
 	if (!abilities)
 		return I40E_ERR_PARAM;
 
-	i40e_fill_default_direct_cmd_desc(&desc,
-					  i40e_aqc_opc_get_phy_abilities);
+	do {
+		i40e_fill_default_direct_cmd_desc(&desc,
+					       i40e_aqc_opc_get_phy_abilities);
 
-	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
-	if (abilities_size > I40E_AQ_LARGE_BUF)
-		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+		if (abilities_size > I40E_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
 
-	if (qualified_modules)
-		desc.params.external.param0 |=
+		if (qualified_modules)
+			desc.params.external.param0 |=
 			cpu_to_le32(I40E_AQ_PHY_REPORT_QUALIFIED_MODULES);
 
-	if (report_init)
-		desc.params.external.param0 |=
+		if (report_init)
+			desc.params.external.param0 |=
 			cpu_to_le32(I40E_AQ_PHY_REPORT_INITIAL_VALUES);
 
-	status = i40e_asq_send_command(hw, &desc, abilities, abilities_size,
-				       cmd_details);
+		status = i40e_asq_send_command(hw, &desc, abilities,
+					       abilities_size, cmd_details);
 
-	if (hw->aq.asq_last_status == I40E_AQ_RC_EIO)
-		status = I40E_ERR_UNKNOWN_PHY;
+		if (status)
+			break;
+
+		if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) {
+			status = I40E_ERR_UNKNOWN_PHY;
+			break;
+		} else if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) {
+			usleep_range(1000, 2000);
+			total_delay++;
+			status = I40E_ERR_TIMEOUT;
+		}
+	} while ((hw->aq.asq_last_status != I40E_AQ_RC_OK) &&
+		 (total_delay < max_delay));
+
+	if (status)
+		return status;
 
 	if (report_init) {
-		hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
-		hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32);
+		if (hw->mac.type ==  I40E_MAC_XL710 &&
+		    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+		    hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+			status = i40e_aq_get_link_info(hw, true, NULL, NULL);
+		} else {
+			hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
+			hw->phy.phy_types |=
+					((u64)abilities->phy_type_ext << 32);
+		}
 	}
 
 	return status;
@@ -1819,7 +1844,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
 	hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA |
 						 I40E_AQ_CONFIG_FEC_RS_ENA);
 	hw_link_info->ext_info = resp->ext_info;
-	hw_link_info->loopback = resp->loopback;
+	hw_link_info->loopback = resp->loopback & I40E_AQ_LOOPBACK_MASK;
 	hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size);
 	hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK;
 
@@ -1850,6 +1875,15 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
 	     hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
 		hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
 
+	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+	    hw->aq.api_min_ver >= 7) {
+		__le32 tmp;
+
+		memcpy(&tmp, resp->link_type, sizeof(tmp));
+		hw->phy.phy_types = le32_to_cpu(tmp);
+		hw->phy.phy_types |= ((u64)resp->link_type_ext << 32);
+	}
+
 	/* save link status information */
 	if (link)
 		*link = *hw_link_info;
@@ -2373,13 +2407,14 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
  * @hw: pointer to the hardware structure
  * @flags: bit flag values to set
  * @valid_flags: which bit flags to set
+ * @mode: cloud filter mode
  * @cmd_details: pointer to command details structure or NULL
  *
  * Set switch configuration bits
  **/
 enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
 						u16 flags,
-						u16 valid_flags,
+						u16 valid_flags, u8 mode,
 				struct i40e_asq_cmd_details *cmd_details)
 {
 	struct i40e_aq_desc desc;
@@ -2391,7 +2426,12 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
 					  i40e_aqc_opc_set_switch_config);
 	scfg->flags = cpu_to_le16(flags);
 	scfg->valid_flags = cpu_to_le16(valid_flags);
-
+	scfg->mode = mode;
+	if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) {
+		scfg->switch_tag = cpu_to_le16(hw->switch_tag);
+		scfg->first_tag = cpu_to_le16(hw->first_tag);
+		scfg->second_tag = cpu_to_le16(hw->second_tag);
+	}
 	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 
 	return status;
@@ -4826,6 +4866,74 @@ phy_blinking_end:
 }
 
 /**
+ * i40e_led_get_reg - read LED register
+ * @hw: pointer to the HW structure
+ * @led_addr: LED register address
+ * @reg_val: read register value
+ **/
+static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
+					      u32 *reg_val)
+{
+	enum i40e_status_code status;
+	u8 phy_addr = 0;
+	u8 port_num;
+	u32 i;
+
+	*reg_val = 0;
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+		status =
+		       i40e_aq_get_phy_register(hw,
+						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+						I40E_PHY_COM_REG_PAGE,
+						I40E_PHY_LED_PROV_REG_1,
+						reg_val, NULL);
+	} else {
+		i = rd32(hw, I40E_PFGEN_PORTNUM);
+		port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+		phy_addr = i40e_get_phy_address(hw, port_num);
+		status = i40e_read_phy_register_clause45(hw,
+							 I40E_PHY_COM_REG_PAGE,
+							 led_addr, phy_addr,
+							 (u16 *)reg_val);
+	}
+	return status;
+}
+
+/**
+ * i40e_led_set_reg - write LED register
+ * @hw: pointer to the HW structure
+ * @led_addr: LED register address
+ * @reg_val: register value to write
+ **/
+static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
+					      u32 reg_val)
+{
+	enum i40e_status_code status;
+	u8 phy_addr = 0;
+	u8 port_num;
+	u32 i;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+		status =
+		       i40e_aq_set_phy_register(hw,
+						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+						I40E_PHY_COM_REG_PAGE,
+						I40E_PHY_LED_PROV_REG_1,
+						reg_val, NULL);
+	} else {
+		i = rd32(hw, I40E_PFGEN_PORTNUM);
+		port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+		phy_addr = i40e_get_phy_address(hw, port_num);
+		status = i40e_write_phy_register_clause45(hw,
+							  I40E_PHY_COM_REG_PAGE,
+							  led_addr, phy_addr,
+							  (u16)reg_val);
+	}
+
+	return status;
+}
+
+/**
  * i40e_led_get_phy - return current on/off mode
  * @hw: pointer to the hw struct
  * @led_addr: address of led register to use
@@ -4842,7 +4950,19 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
 	u16 temp_addr;
 	u8 port_num;
 	u32 i;
-
+	u32 reg_val_aq;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+		status =
+		      i40e_aq_get_phy_register(hw,
+					       I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+					       I40E_PHY_COM_REG_PAGE,
+					       I40E_PHY_LED_PROV_REG_1,
+					       &reg_val_aq, NULL);
+		if (status == I40E_SUCCESS)
+			*val = (u16)reg_val_aq;
+		return status;
+	}
 	temp_addr = I40E_PHY_LED_PROV_REG_1;
 	i = rd32(hw, I40E_PFGEN_PORTNUM);
 	port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
@@ -4877,51 +4997,38 @@ i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on,
 			     u16 led_addr, u32 mode)
 {
 	i40e_status status = 0;
-	u16 led_ctl = 0;
-	u16 led_reg = 0;
-	u8 phy_addr = 0;
-	u8 port_num;
-	u32 i;
+	u32 led_ctl = 0;
+	u32 led_reg = 0;
 
-	i = rd32(hw, I40E_PFGEN_PORTNUM);
-	port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
-	phy_addr = i40e_get_phy_address(hw, port_num);
-	status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
-						 led_addr, phy_addr, &led_reg);
+	status = i40e_led_get_reg(hw, led_addr, &led_reg);
 	if (status)
 		return status;
 	led_ctl = led_reg;
 	if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
 		led_reg = 0;
-		status = i40e_write_phy_register_clause45(hw,
-							  I40E_PHY_COM_REG_PAGE,
-							  led_addr, phy_addr,
-							  led_reg);
+		status = i40e_led_set_reg(hw, led_addr, led_reg);
 		if (status)
 			return status;
 	}
-	status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
-						 led_addr, phy_addr, &led_reg);
+	status = i40e_led_get_reg(hw, led_addr, &led_reg);
 	if (status)
 		goto restore_config;
 	if (on)
 		led_reg = I40E_PHY_LED_MANUAL_ON;
 	else
 		led_reg = 0;
-	status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
-						  led_addr, phy_addr, led_reg);
+
+	status = i40e_led_set_reg(hw, led_addr, led_reg);
 	if (status)
 		goto restore_config;
 	if (mode & I40E_PHY_LED_MODE_ORIG) {
 		led_ctl = (mode & I40E_PHY_LED_MODE_MASK);
-		status = i40e_write_phy_register_clause45(hw,
-						 I40E_PHY_COM_REG_PAGE,
-						 led_addr, phy_addr, led_ctl);
+		status = i40e_led_set_reg(hw, led_addr, led_ctl);
 	}
 	return status;
+
 restore_config:
-	status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
-						  led_addr, phy_addr, led_ctl);
+	status = i40e_led_set_reg(hw, led_addr, led_ctl);
 	return status;
 }
 
@@ -5052,6 +5159,75 @@ do_retry:
 }
 
 /**
+ * i40e_aq_set_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: new register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Write the external PHY register.
+ **/
+i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
+				     u8 phy_select, u8 dev_addr,
+				     u32 reg_addr, u32 reg_val,
+				     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_phy_register_access *cmd =
+		(struct i40e_aqc_phy_register_access *)&desc.params.raw;
+	i40e_status status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_phy_register);
+
+	cmd->phy_interface = phy_select;
+	cmd->dev_address = dev_addr;
+	cmd->reg_address = cpu_to_le32(reg_addr);
+	cmd->reg_value = cpu_to_le32(reg_val);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: read register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the external PHY register.
+ **/
+i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
+				     u8 phy_select, u8 dev_addr,
+				     u32 reg_addr, u32 *reg_val,
+				     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_phy_register_access *cmd =
+		(struct i40e_aqc_phy_register_access *)&desc.params.raw;
+	i40e_status status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_get_phy_register);
+
+	cmd->phy_interface = phy_select;
+	cmd->dev_address = dev_addr;
+	cmd->reg_address = cpu_to_le32(reg_addr);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+	if (!status)
+		*reg_val = le32_to_cpu(cmd->reg_value);
+
+	return status;
+}
+
+/**
  * i40e_aq_write_ppp - Write pipeline personalization profile (ppp)
  * @hw: pointer to the hw struct
  * @buff: command buffer (size in bytes = buff_size)
@@ -5260,5 +5436,194 @@ i40e_add_pinfo_to_list(struct i40e_hw *hw,
 
 	status = i40e_aq_write_ppp(hw, (void *)sec, sec->data_end,
 				   track_id, &offset, &info, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_add_cloud_filters
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to add cloud filters from
+ * @filters: Buffer which contains the filters to be added
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Set the cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_data are filled in by the caller
+ * of the function.
+ *
+ **/
+enum i40e_status_code
+i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	enum i40e_status_code status;
+	u16 buff_len;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_add_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_add_cloud_filters_bb
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to add cloud filters from
+ * @filters: Buffer which contains the filters in big buffer to be added
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Set the big buffer cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the
+ * function.
+ *
+ **/
+i40e_status
+i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	i40e_status status;
+	u16 buff_len;
+	int i;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_add_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+	cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB;
+
+	for (i = 0; i < filter_count; i++) {
+		u16 tnl_type;
+		u32 ti;
+
+		tnl_type = (le16_to_cpu(filters[i].element.flags) &
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >>
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT;
+
+		/* Due to hardware eccentricities, the VNI for Geneve is shifted
+		 * one more byte further than normally used for Tenant ID in
+		 * other tunnel types.
+		 */
+		if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) {
+			ti = le32_to_cpu(filters[i].element.tenant_id);
+			filters[i].element.tenant_id = cpu_to_le32(ti << 8);
+		}
+	}
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_rem_cloud_filters
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to remove cloud filters from
+ * @filters: Buffer which contains the filters to be removed
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Remove the cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_data are filled in by the caller
+ * of the function.
+ *
+ **/
+enum i40e_status_code
+i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	enum i40e_status_code status;
+	u16 buff_len;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_remove_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_rem_cloud_filters_bb
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to remove cloud filters from
+ * @filters: Buffer which contains the filters in big buffer to be removed
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Remove the big buffer cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the
+ * function.
+ *
+ **/
+i40e_status
+i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	i40e_status status;
+	u16 buff_len;
+	int i;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_remove_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+	cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB;
+
+	for (i = 0; i < filter_count; i++) {
+		u16 tnl_type;
+		u32 ti;
+
+		tnl_type = (le16_to_cpu(filters[i].element.flags) &
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >>
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT;
+
+		/* Due to hardware eccentricities, the VNI for Geneve is shifted
+		 * one more byte further than normally used for Tenant ID in
+		 * other tunnel types.
+		 */
+		if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) {
+			ti = le32_to_cpu(filters[i].element.tenant_id);
+			filters[i].element.tenant_id = cpu_to_le32(ti << 8);
+		}
+	}
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
 	return status;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 2cb9539c931e..4c3b4243cf65 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -278,8 +278,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 rx_ring->netdev,
 			 rx_ring->rx_bi);
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-			 i, rx_ring->state,
+			 "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+			 i, *rx_ring->state,
 			 rx_ring->queue_index,
 			 rx_ring->reg_idx);
 		dev_info(&pf->pdev->dev,
@@ -334,8 +334,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 tx_ring->netdev,
 			 tx_ring->tx_bi);
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
-			 i, tx_ring->state,
+			 "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+			 i, *tx_ring->state,
 			 tx_ring->queue_index,
 			 tx_ring->reg_idx);
 		dev_info(&pf->pdev->dev,
@@ -798,8 +798,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		 */
 		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
 			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
-			i40e_do_reset_safe(pf,
-					   BIT_ULL(__I40E_PF_RESET_REQUESTED));
+			i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
 		}
 
 		vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
index f141e78d409e..76ed56641864 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
@@ -36,7 +36,9 @@
 static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw,
 							u32 reg, u32 mask)
 {
-	const u32 patterns[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+	static const u32 patterns[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+	};
 	u32 pat, val, orig_val;
 	int i;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index e9e04a485e0a..5f6cf7212d4f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -227,6 +227,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
 	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
 	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+	I40E_PRIV_FLAG("disable-source-pruning",
+		       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
 };
 
 #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@ -251,428 +253,557 @@ static void i40e_partition_setting_complaint(struct i40e_pf *pf)
 
 /**
  * i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes
- * @phy_types: PHY types to convert
- * @supported: pointer to the ethtool supported variable to fill in
- * @advertising: pointer to the ethtool advertising variable to fill in
+ * @pf: PF struct with phy_types
+ * @ks: ethtool link ksettings struct to fill out
  *
  **/
-static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
-				     u32 *advertising)
+static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
+				     struct ethtool_link_ksettings *ks)
 {
 	struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
 	u64 phy_types = pf->hw.phy.phy_types;
 
-	*supported = 0x0;
-	*advertising = 0x0;
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
 
 	if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
-		*supported |= SUPPORTED_Autoneg |
-			      SUPPORTED_1000baseT_Full;
-		*advertising |= ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-			*advertising |= ADVERTISED_1000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
 		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
-			*supported |= SUPPORTED_100baseT_Full;
-			*advertising |= ADVERTISED_100baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, supported,
+							     100baseT_Full);
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     100baseT_Full);
 		}
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
 	    phy_types & I40E_CAP_PHY_TYPE_XFI ||
 	    phy_types & I40E_CAP_PHY_TYPE_SFI ||
 	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
-	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC)
-		*supported |= SUPPORTED_10000baseT_Full;
-	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
-	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
-	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
-	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
-	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
-		*supported |= SUPPORTED_Autoneg |
-			      SUPPORTED_10000baseT_Full;
-		*advertising |= ADVERTISED_Autoneg;
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-			*advertising |= ADVERTISED_10000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
 	    phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
 	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
-		*supported |= SUPPORTED_40000baseCR4_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
 	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
 	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
-		*supported |= SUPPORTED_Autoneg |
-			      SUPPORTED_40000baseCR4_Full;
-		*advertising |= ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB)
-			*advertising |= ADVERTISED_40000baseCR4_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     40000baseCR4_Full);
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
-		*supported |= SUPPORTED_Autoneg |
-			      SUPPORTED_100baseT_Full;
-		*advertising |= ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
-			*advertising |= ADVERTISED_100baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     100baseT_Full);
 	}
-	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
-	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
-	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
-	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
-		*supported |= SUPPORTED_Autoneg |
-			      SUPPORTED_1000baseT_Full;
-		*advertising |= ADVERTISED_Autoneg;
+	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-			*advertising |= ADVERTISED_1000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
-		*supported |= SUPPORTED_40000baseSR4_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
 	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
-		*supported |= SUPPORTED_40000baseLR4_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
 	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
-		*supported |= SUPPORTED_40000baseKR4_Full |
-			      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_40000baseKR4_Full |
-				ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseLR4_Full);
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
-		*supported |= SUPPORTED_20000baseKR2_Full |
-			      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     20000baseKR2_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB)
-			*advertising |= ADVERTISED_20000baseKR2_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     20000baseKR2_Full);
 	}
-	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
-		if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-			*supported |= SUPPORTED_10000baseKR_Full |
-				      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_Autoneg;
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKX4_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-			if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-				*advertising |= ADVERTISED_10000baseKR_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseKX4_Full);
 	}
-	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
-		*supported |= SUPPORTED_10000baseKX4_Full |
-			      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_Autoneg;
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
+	    !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKR_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-			*advertising |= ADVERTISED_10000baseKX4_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseKR_Full);
 	}
-	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
-		if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-			*supported |= SUPPORTED_1000baseKX_Full |
-				      SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_Autoneg;
+	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
+	    !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseKX_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-			if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
-				*advertising |= ADVERTISED_1000baseKX_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseKX_Full);
 	}
-	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
-	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
-	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+	/* need to add 25G PHY types */
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseKR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseKR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseCR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
 	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) {
-		*supported |= SUPPORTED_Autoneg;
-		*advertising |= ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseSR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseCR_Full);
+	}
+	/* need to add new 10G PHY types */
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseCR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseCR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseSR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseLR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseX_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseX_Full);
+	}
+	/* Autoneg PHY types */
+	if (phy_types & I40E_CAP_PHY_TYPE_SGMII ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+	    phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX ||
+	    phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Autoneg);
 	}
 }
 
 /**
  * i40e_get_settings_link_up - Get the Link settings for when link is up
  * @hw: hw structure
- * @ecmd: ethtool command to fill in
+ * @ks: ethtool ksettings to fill in
  * @netdev: network interface device structure
- *
+ * @pf: pointer to physical function struct
  **/
 static void i40e_get_settings_link_up(struct i40e_hw *hw,
-				      struct ethtool_link_ksettings *cmd,
+				      struct ethtool_link_ksettings *ks,
 				      struct net_device *netdev,
 				      struct i40e_pf *pf)
 {
 	struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+	struct ethtool_link_ksettings cap_ksettings;
 	u32 link_speed = hw_link_info->link_speed;
-	u32 e_advertising = 0x0;
-	u32 e_supported = 0x0;
-	u32 supported, advertising;
-
-	ethtool_convert_link_mode_to_legacy_u32(&supported,
-						cmd->link_modes.supported);
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						cmd->link_modes.advertising);
 
 	/* Initialize supported and advertised settings based on phy settings */
 	switch (hw_link_info->phy_type) {
 	case I40E_PHY_TYPE_40GBASE_CR4:
 	case I40E_PHY_TYPE_40GBASE_CR4_CU:
-		supported = SUPPORTED_Autoneg |
-			    SUPPORTED_40000baseCR4_Full;
-		advertising = ADVERTISED_Autoneg |
-			      ADVERTISED_40000baseCR4_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseCR4_Full);
 		break;
 	case I40E_PHY_TYPE_XLAUI:
 	case I40E_PHY_TYPE_XLPPI:
 	case I40E_PHY_TYPE_40GBASE_AOC:
-		supported = SUPPORTED_40000baseCR4_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
 		break;
 	case I40E_PHY_TYPE_40GBASE_SR4:
-		supported = SUPPORTED_40000baseSR4_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
 		break;
 	case I40E_PHY_TYPE_40GBASE_LR4:
-		supported = SUPPORTED_40000baseLR4_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
 		break;
+	case I40E_PHY_TYPE_25GBASE_SR:
+	case I40E_PHY_TYPE_25GBASE_LR:
 	case I40E_PHY_TYPE_10GBASE_SR:
 	case I40E_PHY_TYPE_10GBASE_LR:
 	case I40E_PHY_TYPE_1000BASE_SX:
 	case I40E_PHY_TYPE_1000BASE_LX:
-		supported = SUPPORTED_10000baseT_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseLR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
 		if (hw_link_info->module_type[2] &
 		    I40E_MODULE_TYPE_1000BASE_SX ||
 		    hw_link_info->module_type[2] &
 		    I40E_MODULE_TYPE_1000BASE_LX) {
-			supported |= SUPPORTED_1000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, supported,
+							     1000baseT_Full);
 			if (hw_link_info->requested_speeds &
 			    I40E_LINK_SPEED_1GB)
-				advertising |= ADVERTISED_1000baseT_Full;
+				ethtool_link_ksettings_add_link_mode(
+				     ks, advertising, 1000baseT_Full);
 		}
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-			advertising |= ADVERTISED_10000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
 		break;
 	case I40E_PHY_TYPE_10GBASE_T:
 	case I40E_PHY_TYPE_1000BASE_T:
 	case I40E_PHY_TYPE_100BASE_TX:
-		supported = SUPPORTED_Autoneg |
-			    SUPPORTED_10000baseT_Full |
-			    SUPPORTED_1000baseT_Full |
-			    SUPPORTED_100baseT_Full;
-		advertising = ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-			advertising |= ADVERTISED_10000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-			advertising |= ADVERTISED_1000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
-			advertising |= ADVERTISED_100baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     100baseT_Full);
 		break;
 	case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
-		supported = SUPPORTED_Autoneg |
-			    SUPPORTED_1000baseT_Full;
-		advertising = ADVERTISED_Autoneg |
-			      ADVERTISED_1000baseT_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseT_Full);
 		break;
 	case I40E_PHY_TYPE_10GBASE_CR1_CU:
 	case I40E_PHY_TYPE_10GBASE_CR1:
-		supported = SUPPORTED_Autoneg |
-			    SUPPORTED_10000baseT_Full;
-		advertising = ADVERTISED_Autoneg |
-			      ADVERTISED_10000baseT_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseT_Full);
 		break;
 	case I40E_PHY_TYPE_XAUI:
 	case I40E_PHY_TYPE_XFI:
 	case I40E_PHY_TYPE_SFI:
 	case I40E_PHY_TYPE_10GBASE_SFPP_CU:
 	case I40E_PHY_TYPE_10GBASE_AOC:
-		supported = SUPPORTED_10000baseT_Full;
-		advertising = SUPPORTED_10000baseT_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
 		break;
 	case I40E_PHY_TYPE_SGMII:
-		supported = SUPPORTED_Autoneg |
-			    SUPPORTED_1000baseT_Full;
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-			advertising |= ADVERTISED_1000baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
 		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
-			supported |= SUPPORTED_100baseT_Full;
+			ethtool_link_ksettings_add_link_mode(ks, supported,
+							     100baseT_Full);
 			if (hw_link_info->requested_speeds &
 			    I40E_LINK_SPEED_100MB)
-				advertising |= ADVERTISED_100baseT_Full;
+				ethtool_link_ksettings_add_link_mode(
+				      ks, advertising, 100baseT_Full);
 		}
 		break;
 	case I40E_PHY_TYPE_40GBASE_KR4:
+	case I40E_PHY_TYPE_25GBASE_KR:
 	case I40E_PHY_TYPE_20GBASE_KR2:
 	case I40E_PHY_TYPE_10GBASE_KR:
 	case I40E_PHY_TYPE_10GBASE_KX4:
 	case I40E_PHY_TYPE_1000BASE_KX:
-		supported |= SUPPORTED_40000baseKR4_Full |
-			     SUPPORTED_20000baseKR2_Full |
-			     SUPPORTED_10000baseKR_Full |
-			     SUPPORTED_10000baseKX4_Full |
-			     SUPPORTED_1000baseKX_Full |
-			     SUPPORTED_Autoneg;
-		advertising |= ADVERTISED_40000baseKR4_Full |
-			       ADVERTISED_20000baseKR2_Full |
-			       ADVERTISED_10000baseKR_Full |
-			       ADVERTISED_10000baseKX4_Full |
-			       ADVERTISED_1000baseKX_Full |
-			       ADVERTISED_Autoneg;
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     20000baseKR2_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKX4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseKX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     20000baseKR2_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseKX4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseKX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
 		break;
-	case I40E_PHY_TYPE_25GBASE_KR:
 	case I40E_PHY_TYPE_25GBASE_CR:
-	case I40E_PHY_TYPE_25GBASE_SR:
-	case I40E_PHY_TYPE_25GBASE_LR:
-		supported = SUPPORTED_Autoneg;
-		advertising = ADVERTISED_Autoneg;
-		/* TODO: add speeds when ethtool is ready to support*/
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseCR_Full);
+		break;
+	case I40E_PHY_TYPE_25GBASE_AOC:
+	case I40E_PHY_TYPE_25GBASE_ACC:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseCR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseCR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseCR_Full);
 		break;
 	default:
 		/* if we got here and link is up something bad is afoot */
-		netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+		netdev_info(netdev,
+			    "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
 			    hw_link_info->phy_type);
 	}
 
 	/* Now that we've worked out everything that could be supported by the
-	 * current PHY type, get what is supported by the NVM and them to
-	 * get what is truly supported
+	 * current PHY type, get what is supported by the NVM and intersect
+	 * them to get what is truly supported
 	 */
-	i40e_phy_type_to_ethtool(pf, &e_supported,
-				 &e_advertising);
-
-	supported = supported & e_supported;
-	advertising = advertising & e_advertising;
+	memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings));
+	i40e_phy_type_to_ethtool(pf, &cap_ksettings);
+	ethtool_intersect_link_masks(ks, &cap_ksettings);
 
 	/* Set speed and duplex */
 	switch (link_speed) {
 	case I40E_LINK_SPEED_40GB:
-		cmd->base.speed = SPEED_40000;
+		ks->base.speed = SPEED_40000;
 		break;
 	case I40E_LINK_SPEED_25GB:
-#ifdef SPEED_25000
-		cmd->base.speed = SPEED_25000;
-#else
-		netdev_info(netdev,
-			    "Speed is 25G, display not supported by this version of ethtool.\n");
-#endif
+		ks->base.speed = SPEED_25000;
 		break;
 	case I40E_LINK_SPEED_20GB:
-		cmd->base.speed = SPEED_20000;
+		ks->base.speed = SPEED_20000;
 		break;
 	case I40E_LINK_SPEED_10GB:
-		cmd->base.speed = SPEED_10000;
+		ks->base.speed = SPEED_10000;
 		break;
 	case I40E_LINK_SPEED_1GB:
-		cmd->base.speed = SPEED_1000;
+		ks->base.speed = SPEED_1000;
 		break;
 	case I40E_LINK_SPEED_100MB:
-		cmd->base.speed = SPEED_100;
+		ks->base.speed = SPEED_100;
 		break;
 	default:
 		break;
 	}
-	cmd->base.duplex = DUPLEX_FULL;
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
+	ks->base.duplex = DUPLEX_FULL;
 }
 
 /**
  * i40e_get_settings_link_down - Get the Link settings for when link is down
  * @hw: hw structure
- * @ecmd: ethtool command to fill in
+ * @ks: ethtool ksettings to fill in
+ * @pf: pointer to physical function struct
  *
  * Reports link settings that can be determined when link is down
  **/
 static void i40e_get_settings_link_down(struct i40e_hw *hw,
-					struct ethtool_link_ksettings *cmd,
+					struct ethtool_link_ksettings *ks,
 					struct i40e_pf *pf)
 {
-	u32 supported, advertising;
-
 	/* link is down and the driver needs to fall back on
 	 * supported phy types to figure out what info to display
 	 */
-	i40e_phy_type_to_ethtool(pf, &supported, &advertising);
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
+	i40e_phy_type_to_ethtool(pf, ks);
 
 	/* With no link speed and duplex are unknown */
-	cmd->base.speed = SPEED_UNKNOWN;
-	cmd->base.duplex = DUPLEX_UNKNOWN;
+	ks->base.speed = SPEED_UNKNOWN;
+	ks->base.duplex = DUPLEX_UNKNOWN;
 }
 
 /**
- * i40e_get_settings - Get Link Speed and Duplex settings
+ * i40e_get_link_ksettings - Get Link Speed and Duplex settings
  * @netdev: network interface device structure
- * @ecmd: ethtool command
+ * @ks: ethtool ksettings
  *
  * Reports speed/duplex settings based on media_type
  **/
 static int i40e_get_link_ksettings(struct net_device *netdev,
-				   struct ethtool_link_ksettings *cmd)
+				   struct ethtool_link_ksettings *ks)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_link_status *hw_link_info = &hw->phy.link_info;
 	bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
-	u32 advertising;
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
 
 	if (link_up)
-		i40e_get_settings_link_up(hw, cmd, netdev, pf);
+		i40e_get_settings_link_up(hw, ks, netdev, pf);
 	else
-		i40e_get_settings_link_down(hw, cmd, pf);
+		i40e_get_settings_link_down(hw, ks, pf);
 
 	/* Now set the settings that don't rely on link being up/down */
 	/* Set autoneg settings */
-	cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
-			  AUTONEG_ENABLE : AUTONEG_DISABLE);
+	ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+			    AUTONEG_ENABLE : AUTONEG_DISABLE);
 
+	/* Set media type settings */
 	switch (hw->phy.media_type) {
 	case I40E_MEDIA_TYPE_BACKPLANE:
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     Autoneg);
-		ethtool_link_ksettings_add_link_mode(cmd, supported,
-						     Backplane);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     Autoneg);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     Backplane);
-		cmd->base.port = PORT_NONE;
+		ks->base.port = PORT_NONE;
 		break;
 	case I40E_MEDIA_TYPE_BASET:
-		ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
-		cmd->base.port = PORT_TP;
+		ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+		ks->base.port = PORT_TP;
 		break;
 	case I40E_MEDIA_TYPE_DA:
 	case I40E_MEDIA_TYPE_CX4:
-		ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-		cmd->base.port = PORT_DA;
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+		ks->base.port = PORT_DA;
 		break;
 	case I40E_MEDIA_TYPE_FIBER:
-		ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-		cmd->base.port = PORT_FIBRE;
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ks->base.port = PORT_FIBRE;
 		break;
 	case I40E_MEDIA_TYPE_UNKNOWN:
 	default:
-		cmd->base.port = PORT_OTHER;
+		ks->base.port = PORT_OTHER;
 		break;
 	}
 
 	/* Set flow control settings */
-	ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
 
 	switch (hw->fc.requested_mode) {
 	case I40E_FC_FULL:
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
 		break;
 	case I40E_FC_TX_PAUSE:
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     Asym_Pause);
 		break;
 	case I40E_FC_RX_PAUSE:
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
-						     Pause);
-		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     Asym_Pause);
 		break;
 	default:
-		ethtool_convert_link_mode_to_legacy_u32(
-			&advertising, cmd->link_modes.advertising);
-
-		advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-
-		ethtool_convert_legacy_u32_to_link_mode(
-			cmd->link_modes.advertising, advertising);
+		ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, advertising,
+						     Asym_Pause);
 		break;
 	}
 
@@ -680,30 +811,28 @@ static int i40e_get_link_ksettings(struct net_device *netdev,
 }
 
 /**
- * i40e_set_settings - Set Speed and Duplex
+ * i40e_set_link_ksettings - Set Speed and Duplex
  * @netdev: network interface device structure
- * @ecmd: ethtool command
+ * @ks: ethtool ksettings
  *
  * Set speed/duplex per media_types advertised/forced
  **/
 static int i40e_set_link_ksettings(struct net_device *netdev,
-				   const struct ethtool_link_ksettings *cmd)
+				   const struct ethtool_link_ksettings *ks)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct ethtool_link_ksettings safe_ks;
+	struct ethtool_link_ksettings copy_ks;
 	struct i40e_aq_set_phy_config config;
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_hw *hw = &pf->hw;
-	struct ethtool_link_ksettings safe_cmd;
-	struct ethtool_link_ksettings copy_cmd;
+	bool autoneg_changed = false;
 	i40e_status status = 0;
-	bool change = false;
 	int timeout = 50;
 	int err = 0;
-	u32 autoneg;
-	u32 advertise;
-	u32 tmp;
+	u8 autoneg;
 
 	/* Changing port settings is not supported if this isn't the
 	 * port's controlling PF
@@ -712,17 +841,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 		i40e_partition_setting_complaint(pf);
 		return -EOPNOTSUPP;
 	}
-
 	if (vsi != pf->vsi[pf->lan_vsi])
 		return -EOPNOTSUPP;
-
 	if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
 	    hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
 	    hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
 	    hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
 	    hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
 		return -EOPNOTSUPP;
-
 	if (hw->device_id == I40E_DEV_ID_KX_B ||
 	    hw->device_id == I40E_DEV_ID_KX_C ||
 	    hw->device_id == I40E_DEV_ID_20G_KR2 ||
@@ -731,31 +857,37 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
-	/* copy the cmd to copy_cmd to avoid modifying the origin */
-	memcpy(&copy_cmd, cmd, sizeof(struct ethtool_link_ksettings));
+	/* copy the ksettings to copy_ks to avoid modifying the origin */
+	memcpy(&copy_ks, ks, sizeof(struct ethtool_link_ksettings));
 
-	/* get our own copy of the bits to check against */
-	memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings));
-	i40e_get_link_ksettings(netdev, &safe_cmd);
+	/* save autoneg out of ksettings */
+	autoneg = copy_ks.base.autoneg;
 
-	/* save autoneg and speed out of cmd */
-	autoneg = cmd->base.autoneg;
-	ethtool_convert_link_mode_to_legacy_u32(&advertise,
-						cmd->link_modes.advertising);
+	memset(&safe_ks, 0, sizeof(safe_ks));
+	/* Get link modes supported by hardware and check against modes
+	 * requested by the user.  Return an error if unsupported mode was set.
+	 */
+	i40e_phy_type_to_ethtool(pf, &safe_ks);
+	if (!bitmap_subset(copy_ks.link_modes.advertising,
+			   safe_ks.link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS))
+		return -EINVAL;
 
-	/* set autoneg and speed back to what they currently are */
-	copy_cmd.base.autoneg = safe_cmd.base.autoneg;
-	ethtool_convert_link_mode_to_legacy_u32(
-		&tmp, safe_cmd.link_modes.advertising);
-	ethtool_convert_legacy_u32_to_link_mode(
-		copy_cmd.link_modes.advertising, tmp);
+	/* get our own copy of the bits to check against */
+	memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+	safe_ks.base.cmd = copy_ks.base.cmd;
+	safe_ks.base.link_mode_masks_nwords =
+		copy_ks.base.link_mode_masks_nwords;
+	i40e_get_link_ksettings(netdev, &safe_ks);
 
-	copy_cmd.base.cmd = safe_cmd.base.cmd;
+	/* set autoneg back to what it currently is */
+	copy_ks.base.autoneg = safe_ks.base.autoneg;
 
-	/* If copy_cmd and safe_cmd are not the same now, then they are
-	 * trying to set something that we do not support
+	/* If copy_ks.base and safe_ks.base are not the same now, then they are
+	 * trying to set something that we do not support.
 	 */
-	if (memcmp(&copy_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings)))
+	if (memcmp(&copy_ks.base, &safe_ks.base,
+		   sizeof(struct ethtool_link_settings)))
 		return -EOPNOTSUPP;
 
 	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
@@ -784,8 +916,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 		/* If autoneg was not already enabled */
 		if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
 			/* If autoneg is not supported, return error */
-			if (!ethtool_link_ksettings_test_link_mode(
-				    &safe_cmd, supported, Autoneg)) {
+			if (!ethtool_link_ksettings_test_link_mode(&safe_ks,
+								   supported,
+								   Autoneg)) {
 				netdev_info(netdev, "Autoneg not supported on this phy\n");
 				err = -EINVAL;
 				goto done;
@@ -793,7 +926,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 			/* Autoneg is allowed to change */
 			config.abilities = abilities.abilities |
 					   I40E_AQ_PHY_ENABLE_AN;
-			change = true;
+			autoneg_changed = true;
 		}
 	} else {
 		/* If autoneg is currently enabled */
@@ -801,8 +934,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 			/* If autoneg is supported 10GBASE_T is the only PHY
 			 * that can disable it, so otherwise return error
 			 */
-			if (ethtool_link_ksettings_test_link_mode(
-				    &safe_cmd, supported, Autoneg) &&
+			if (ethtool_link_ksettings_test_link_mode(&safe_ks,
+								  supported,
+								  Autoneg) &&
 			    hw->phy.link_info.phy_type !=
 			    I40E_PHY_TYPE_10GBASE_T) {
 				netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
@@ -812,32 +946,49 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 			/* Autoneg is allowed to change */
 			config.abilities = abilities.abilities &
 					   ~I40E_AQ_PHY_ENABLE_AN;
-			change = true;
+			autoneg_changed = true;
 		}
 	}
 
-	ethtool_convert_link_mode_to_legacy_u32(&tmp,
-						safe_cmd.link_modes.supported);
-	if (advertise & ~tmp) {
-		err = -EINVAL;
-		goto done;
-	}
-
-	if (advertise & ADVERTISED_100baseT_Full)
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100baseT_Full))
 		config.link_speed |= I40E_LINK_SPEED_100MB;
-	if (advertise & ADVERTISED_1000baseT_Full ||
-	    advertise & ADVERTISED_1000baseKX_Full)
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseX_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseKX_Full))
 		config.link_speed |= I40E_LINK_SPEED_1GB;
-	if (advertise & ADVERTISED_10000baseT_Full ||
-	    advertise & ADVERTISED_10000baseKX4_Full ||
-	    advertise & ADVERTISED_10000baseKR_Full)
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseKX4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseKR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseCR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseSR_Full))
 		config.link_speed |= I40E_LINK_SPEED_10GB;
-	if (advertise & ADVERTISED_20000baseKR2_Full)
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  20000baseKR2_Full))
 		config.link_speed |= I40E_LINK_SPEED_20GB;
-	if (advertise & ADVERTISED_40000baseKR4_Full ||
-	    advertise & ADVERTISED_40000baseCR4_Full ||
-	    advertise & ADVERTISED_40000baseSR4_Full ||
-	    advertise & ADVERTISED_40000baseLR4_Full)
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseCR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseKR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseSR_Full))
+		config.link_speed |= I40E_LINK_SPEED_25GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseKR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseLR4_Full))
 		config.link_speed |= I40E_LINK_SPEED_40GB;
 
 	/* If speed didn't get set, set it to what it currently is.
@@ -846,8 +997,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	 */
 	if (!config.link_speed)
 		config.link_speed = abilities.link_speed;
-
-	if (change || (abilities.link_speed != config.link_speed)) {
+	if (autoneg_changed || abilities.link_speed != config.link_speed) {
 		/* copy over the rest of the abilities */
 		config.phy_type = abilities.phy_type;
 		config.phy_type_ext = abilities.phy_type_ext;
@@ -874,7 +1024,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 		/* make the aq call */
 		status = i40e_aq_set_phy_config(hw, &config, NULL);
 		if (status) {
-			netdev_info(netdev, "Set phy config failed, err %s aq_err %s\n",
+			netdev_info(netdev,
+				    "Set phy config failed, err %s aq_err %s\n",
 				    i40e_stat_str(hw, status),
 				    i40e_aq_str(hw, hw->aq.asq_last_status));
 			err = -EAGAIN;
@@ -883,7 +1034,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 
 		status = i40e_update_link_info(hw);
 		if (status)
-			netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n",
+			netdev_dbg(netdev,
+				   "Updating link info failed with err %s aq_err %s\n",
 				   i40e_stat_str(hw, status),
 				   i40e_aq_str(hw, hw->aq.asq_last_status));
 
@@ -2008,7 +2160,9 @@ static int i40e_set_phys_id(struct net_device *netdev,
 		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
 			pf->led_status = i40e_led_get(hw);
 		} else {
-			i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL);
+			if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+				i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
+						      NULL);
 			ret = i40e_led_get_phy(hw, &temp_status,
 					       &pf->phy_led_val);
 			pf->led_status = temp_status;
@@ -2033,7 +2187,8 @@ static int i40e_set_phys_id(struct net_device *netdev,
 			ret = i40e_led_set_phy(hw, false, pf->led_status,
 					       (pf->phy_led_val |
 					       I40E_PHY_LED_MODE_ORIG));
-			i40e_aq_set_phy_debug(hw, 0, NULL);
+			if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+				i40e_aq_set_phy_debug(hw, 0, NULL);
 		}
 		break;
 	default:
@@ -2071,14 +2226,13 @@ static int __i40e_get_coalesce(struct net_device *netdev,
 	ec->tx_max_coalesced_frames_irq = vsi->work_limit;
 	ec->rx_max_coalesced_frames_irq = vsi->work_limit;
 
-	/* rx and tx usecs has per queue value. If user doesn't specify the queue,
-	 * return queue 0's value to represent.
+	/* rx and tx usecs has per queue value. If user doesn't specify the
+	 * queue, return queue 0's value to represent.
 	 */
-	if (queue < 0) {
+	if (queue < 0)
 		queue = 0;
-	} else if (queue >= vsi->num_queue_pairs) {
+	else if (queue >= vsi->num_queue_pairs)
 		return -EINVAL;
-	}
 
 	rx_ring = vsi->rx_rings[queue];
 	tx_ring = vsi->tx_rings[queue];
@@ -2092,7 +2246,6 @@ static int __i40e_get_coalesce(struct net_device *netdev,
 	ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
 	ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
 
-
 	/* we use the _usecs_high to store/set the interrupt rate limit
 	 * that the hardware supports, that almost but not quite
 	 * fits the original intent of the ethtool variable,
@@ -2142,7 +2295,6 @@ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
  *
  * Change the ITR settings for a specific queue.
  **/
-
 static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 				   struct ethtool_coalesce *ec,
 				   int queue)
@@ -2264,8 +2416,8 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 			   vsi->int_rate_limit);
 	}
 
-	/* rx and tx usecs has per queue value. If user doesn't specify the queue,
-	 * apply to all queues.
+	/* rx and tx usecs has per queue value. If user doesn't specify the
+	 * queue, apply to all queues.
 	 */
 	if (queue < 0) {
 		for (i = 0; i < vsi->num_queue_pairs; i++)
@@ -2647,7 +2799,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
-		cmd->data = vsi->num_queue_pairs;
+		cmd->data = vsi->rss_size;
 		ret = 0;
 		break;
 	case ETHTOOL_GRXFH:
@@ -3892,6 +4044,12 @@ static int i40e_set_channels(struct net_device *dev,
 	if (vsi->type != I40E_VSI_MAIN)
 		return -EINVAL;
 
+	/* We do not support setting channels via ethtool when TCs are
+	 * configured through mqprio
+	 */
+	if (pf->flags & I40E_FLAG_TC_MQPRIO)
+		return -EINVAL;
+
 	/* verify they are not requesting separate vectors */
 	if (!count || ch->rx_count || ch->tx_count)
 		return -EINVAL;
@@ -3959,6 +4117,16 @@ static u32 i40e_get_rxfh_indir_size(struct net_device *netdev)
 	return I40E_HLUT_ARRAY_SIZE;
 }
 
+/**
+ * i40e_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware. Returns 0 on
+ * success.
+ **/
 static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 			 u8 *hfunc)
 {
@@ -4090,7 +4258,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	u64 orig_flags, new_flags, changed_flags;
+	u32 orig_flags, new_flags, changed_flags;
 	u32 i, j;
 
 	orig_flags = READ_ONCE(pf->flags);
@@ -4142,12 +4310,12 @@ flags_complete:
 		return -EOPNOTSUPP;
 
 	/* Compare and exchange the new flags into place. If we failed, that
-	 * is if cmpxchg64 returns anything but the old value, this means that
+	 * is if cmpxchg returns anything but the old value, this means that
 	 * something else has modified the flags variable since we copied it
 	 * originally. We'll just punt with an error and log something in the
 	 * message buffer.
 	 */
-	if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
+	if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) {
 		dev_warn(&pf->pdev->dev,
 			 "Unable to update pf->flags as it was modified by another thread...\n");
 		return -EAGAIN;
@@ -4175,7 +4343,7 @@ flags_complete:
 			sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
-						NULL);
+						0, NULL);
 		if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
 			dev_info(&pf->pdev->dev,
 				 "couldn't set switch config bits, err %s aq_err %s\n",
@@ -4189,13 +4357,166 @@ flags_complete:
 	/* Issue reset to cause things to take effect, as additional bits
 	 * are added we will need to create a mask of bits requiring reset
 	 */
-	if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
-	    ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
+	if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+			     I40E_FLAG_LEGACY_RX |
+			     I40E_FLAG_SOURCE_PRUNING_DISABLED))
 		i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
 
 	return 0;
 }
 
+/**
+ * i40e_get_module_info - get (Q)SFP+ module type info
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ **/
+static int i40e_get_module_info(struct net_device *netdev,
+				struct ethtool_modinfo *modinfo)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u32 sff8472_comp = 0;
+	u32 sff8472_swap = 0;
+	u32 sff8636_rev = 0;
+	i40e_status status;
+	u32 type = 0;
+
+	/* Check if firmware supports reading module EEPROM. */
+	if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+		netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
+		return -EINVAL;
+	}
+
+	status = i40e_update_link_info(hw);
+	if (status)
+		return -EIO;
+
+	if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) {
+		netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n");
+		return -EINVAL;
+	}
+
+	type = hw->phy.link_info.module_type[0];
+
+	switch (type) {
+	case I40E_MODULE_TYPE_SFP:
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				I40E_I2C_EEPROM_DEV_ADDR,
+				I40E_MODULE_SFF_8472_COMP,
+				&sff8472_comp, NULL);
+		if (status)
+			return -EIO;
+
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				I40E_I2C_EEPROM_DEV_ADDR,
+				I40E_MODULE_SFF_8472_SWAP,
+				&sff8472_swap, NULL);
+		if (status)
+			return -EIO;
+
+		/* Check if the module requires address swap to access
+		 * the other EEPROM memory page.
+		 */
+		if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) {
+			netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n");
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (sff8472_comp == 0x00) {
+			/* Module is not SFF-8472 compliant */
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		}
+		break;
+	case I40E_MODULE_TYPE_QSFP_PLUS:
+		/* Read from memory page 0. */
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				0,
+				I40E_MODULE_REVISION_ADDR,
+				&sff8636_rev, NULL);
+		if (status)
+			return -EIO;
+		/* Determine revision compliance byte */
+		if (sff8636_rev > 0x02) {
+			/* Module is SFF-8636 compliant */
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+		}
+		break;
+	case I40E_MODULE_TYPE_QSFP28:
+		modinfo->type = ETH_MODULE_SFF_8636;
+		modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+		break;
+	default:
+		netdev_err(vsi->netdev, "Module type unrecognized\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ **/
+static int i40e_get_module_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *ee,
+				  u8 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	bool is_sfp = false;
+	i40e_status status;
+	u32 value = 0;
+	int i;
+
+	if (!ee || !ee->len || !data)
+		return -EINVAL;
+
+	if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP)
+		is_sfp = true;
+
+	for (i = 0; i < ee->len; i++) {
+		u32 offset = i + ee->offset;
+		u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0;
+
+		/* Check if we need to access the other memory page */
+		if (is_sfp) {
+			if (offset >= ETH_MODULE_SFF_8079_LEN) {
+				offset -= ETH_MODULE_SFF_8079_LEN;
+				addr = I40E_I2C_EEPROM_DEV_ADDR2;
+			}
+		} else {
+			while (offset >= ETH_MODULE_SFF_8436_LEN) {
+				/* Compute memory page number and offset. */
+				offset -= ETH_MODULE_SFF_8436_LEN / 2;
+				addr++;
+			}
+		}
+
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				addr, offset, &value, NULL);
+		if (status)
+			return -EIO;
+		data[i] = value;
+	}
+	return 0;
+}
+
 static const struct ethtool_ops i40e_ethtool_ops = {
 	.get_drvinfo		= i40e_get_drvinfo,
 	.get_regs_len		= i40e_get_regs_len,
@@ -4228,6 +4549,8 @@ static const struct ethtool_ops i40e_ethtool_ops = {
 	.set_rxfh		= i40e_set_rxfh,
 	.get_channels		= i40e_get_channels,
 	.set_channels		= i40e_set_channels,
+	.get_module_info	= i40e_get_module_info,
+	.get_module_eeprom	= i40e_get_module_eeprom,
 	.get_ts_info		= i40e_get_ts_info,
 	.get_priv_flags		= i40e_get_priv_flags,
 	.set_priv_flags		= i40e_set_priv_flags,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index de1fcac7834d..4a964d6e4a9e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -69,6 +69,15 @@ static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
+static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+				     struct i40e_cloud_filter *filter,
+				     bool add);
+static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+					     struct i40e_cloud_filter *filter,
+					     bool add);
+static int i40e_get_capabilities(struct i40e_pf *pf,
+				 enum i40e_admin_queue_opc list_type);
+
 
 /* i40e_pci_tbl - PCI Device ID Table
  *
@@ -600,6 +609,20 @@ static void i40e_stat_update32(struct i40e_hw *hw, u32 reg,
 }
 
 /**
+ * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
+ * @hw: ptr to the hardware info
+ * @reg: the hw reg to read and clear
+ * @stat: ptr to the stat
+ **/
+static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
+{
+	u32 new_data = rd32(hw, reg);
+
+	wr32(hw, reg, 1); /* must write a nonzero value to clear register */
+	*stat += new_data;
+}
+
+/**
  * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
  * @vsi: the VSI to be updated
  **/
@@ -1040,18 +1063,15 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
 			   &osd->rx_jabber, &nsd->rx_jabber);
 
 	/* FDIR stats */
-	i40e_stat_update32(hw,
-			   I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)),
-			   pf->stat_offsets_loaded,
-			   &osd->fd_atr_match, &nsd->fd_atr_match);
-	i40e_stat_update32(hw,
-			   I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)),
-			   pf->stat_offsets_loaded,
-			   &osd->fd_sb_match, &nsd->fd_sb_match);
-	i40e_stat_update32(hw,
-		      I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)),
-		      pf->stat_offsets_loaded,
-		      &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match);
+	i40e_stat_update_and_clear32(hw,
+			I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
+			&nsd->fd_atr_match);
+	i40e_stat_update_and_clear32(hw,
+			I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
+			&nsd->fd_sb_match);
+	i40e_stat_update_and_clear32(hw,
+			I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
+			&nsd->fd_atr_tunnel_match);
 
 	val = rd32(hw, I40E_PRTPM_EEE_STAT);
 	nsd->tx_lpi_status =
@@ -1578,6 +1598,170 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
 }
 
 /**
+ * i40e_config_rss_aq - Prepare for RSS using AQ commands
+ * @vsi: vsi structure
+ * @seed: RSS hash seed
+ **/
+static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+			      u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int ret = 0;
+
+	if (seed) {
+		struct i40e_aqc_get_set_rss_key_data *seed_dw =
+			(struct i40e_aqc_get_set_rss_key_data *)seed;
+		ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot set RSS key, err %s aq_err %s\n",
+				 i40e_stat_str(hw, ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
+	if (lut) {
+		bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
+
+		ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot set RSS lut, err %s aq_err %s\n",
+				 i40e_stat_str(hw, ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
+	return ret;
+}
+
+/**
+ * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+ * @vsi: VSI structure
+ **/
+static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 seed[I40E_HKEY_ARRAY_SIZE];
+	u8 *lut;
+	int ret;
+
+	if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+		return 0;
+	if (!vsi->rss_size)
+		vsi->rss_size = min_t(int, pf->alloc_rss_size,
+				      vsi->num_queue_pairs);
+	if (!vsi->rss_size)
+		return -EINVAL;
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* Use the user configured hash keys and lookup table if there is one,
+	 * otherwise use default
+	 */
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+	kfree(lut);
+	return ret;
+}
+
+/**
+ * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @enabled_tc: number of traffic classes to enable
+ *
+ * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+ **/
+static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
+					   struct i40e_vsi_context *ctxt,
+					   u8 enabled_tc)
+{
+	u16 qcount = 0, max_qcount, qmap, sections = 0;
+	int i, override_q, pow, num_qps, ret;
+	u8 netdev_tc = 0, offset = 0;
+
+	if (vsi->type != I40E_VSI_MAIN)
+		return -EINVAL;
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+	vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
+	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+	num_qps = vsi->mqprio_qopt.qopt.count[0];
+
+	/* find the next higher power-of-2 of num queue pairs */
+	pow = ilog2(num_qps);
+	if (!is_power_of_2(num_qps))
+		pow++;
+	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+	/* Setup queue offset/count for all TCs for given VSI */
+	max_qcount = vsi->mqprio_qopt.qopt.count[0];
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		/* See if the given TC is enabled for the given VSI */
+		if (vsi->tc_config.enabled_tc & BIT(i)) {
+			offset = vsi->mqprio_qopt.qopt.offset[i];
+			qcount = vsi->mqprio_qopt.qopt.count[i];
+			if (qcount > max_qcount)
+				max_qcount = qcount;
+			vsi->tc_config.tc_info[i].qoffset = offset;
+			vsi->tc_config.tc_info[i].qcount = qcount;
+			vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+		} else {
+			/* TC is not enabled so set the offset to
+			 * default queue and allocate one queue
+			 * for the given TC.
+			 */
+			vsi->tc_config.tc_info[i].qoffset = 0;
+			vsi->tc_config.tc_info[i].qcount = 1;
+			vsi->tc_config.tc_info[i].netdev_tc = 0;
+		}
+	}
+
+	/* Set actual Tx/Rx queue pairs */
+	vsi->num_queue_pairs = offset + qcount;
+
+	/* Setup queue TC[0].qmap for given VSI context */
+	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+	ctxt->info.valid_sections |= cpu_to_le16(sections);
+
+	/* Reconfigure RSS for main VSI with max queue count */
+	vsi->rss_size = max_qcount;
+	ret = i40e_vsi_config_rss(vsi);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to reconfig rss for num_queues (%u)\n",
+			 max_qcount);
+		return ret;
+	}
+	vsi->reconfig_rss = true;
+	dev_dbg(&vsi->back->pdev->dev,
+		"Reconfigured rss with num_queues (%u)\n", max_qcount);
+
+	/* Find queue count available for channel VSIs and starting offset
+	 * for channel VSIs
+	 */
+	override_q = vsi->mqprio_qopt.qopt.count[0];
+	if (override_q && override_q < vsi->num_queue_pairs) {
+		vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
+		vsi->next_base_queue = override_q;
+	}
+	return 0;
+}
+
+/**
  * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
  * @vsi: the VSI being setup
  * @ctxt: VSI context structure
@@ -1615,7 +1799,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 			numtc = 1;
 		}
 	} else {
-		/* At least TC0 is enabled in case of non-DCB case */
+		/* At least TC0 is enabled in non-DCB, non-MQPRIO case */
 		numtc = 1;
 	}
 
@@ -1765,11 +1949,6 @@ static void i40e_set_rx_mode(struct net_device *netdev)
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
 		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
 	}
-
-	/* schedule our worker thread which will take care of
-	 * applying the new filter changes
-	 */
-	i40e_service_event_schedule(vsi->back);
 }
 
 /**
@@ -2873,22 +3052,18 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
  **/
 static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
 {
-	struct i40e_vsi *vsi = ring->vsi;
+	int cpu;
 
-	if (!ring->q_vector || !ring->netdev)
+	if (!ring->q_vector || !ring->netdev || ring->ch)
 		return;
 
-	if ((vsi->tc_config.numtc <= 1) &&
-	    !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
-		netif_set_xps_queue(ring->netdev,
-				    get_cpu_mask(ring->q_vector->v_idx),
-				    ring->queue_index);
-	}
+	/* We only initialize XPS once, so as not to overwrite user settings */
+	if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
+		return;
 
-	/* schedule our worker thread which will take care of
-	 * applying the new filter changes
-	 */
-	i40e_service_event_schedule(vsi->back);
+	cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
+	netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
+			    ring->queue_index);
 }
 
 /**
@@ -2942,7 +3117,14 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 	 * initialization. This has to be done regardless of
 	 * DCB as by default everything is mapped to TC0.
 	 */
-	tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
+	if (ring->ch)
+		tx_ctx.rdylist =
+			le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
+
+	else
+		tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
 	tx_ctx.rdylist_act = 0;
 
 	/* clear the context in the HMC */
@@ -2964,12 +3146,23 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 	}
 
 	/* Now associate this queue with this PCI function */
-	if (vsi->type == I40E_VSI_VMDQ2) {
-		qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
-		qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
-			   I40E_QTX_CTL_VFVM_INDX_MASK;
+	if (ring->ch) {
+		if (ring->ch->type == I40E_VSI_VMDQ2)
+			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+		else
+			return -EINVAL;
+
+		qtx_ctl |= (ring->ch->vsi_number <<
+			    I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+			    I40E_QTX_CTL_VFVM_INDX_MASK;
 	} else {
-		qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+		if (vsi->type == I40E_VSI_VMDQ2) {
+			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+			qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+				    I40E_QTX_CTL_VFVM_INDX_MASK;
+		} else {
+			qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+		}
 	}
 
 	qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
@@ -2998,7 +3191,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	struct i40e_hmc_obj_rxq rx_ctx;
 	i40e_status err = 0;
 
-	ring->state = 0;
+	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
 
 	/* clear the context structure first */
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
@@ -3023,7 +3216,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	if (hw->revision_id == 0)
 		rx_ctx.lrxqthresh = 0;
 	else
-		rx_ctx.lrxqthresh = 2;
+		rx_ctx.lrxqthresh = 1;
 	rx_ctx.crcstrip = 1;
 	rx_ctx.l2tsel = 1;
 	/* this controls whether VLAN is stripped from inner headers */
@@ -3138,6 +3331,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
 			rx_ring->dcb_tc = 0;
 			tx_ring->dcb_tc = 0;
 		}
+		return;
 	}
 
 	for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
@@ -3396,15 +3590,14 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf)
 /**
  * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
  * @pf: board private structure
- * @clearpba: true when all pending interrupt events should be cleared
  **/
-void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba)
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
 {
 	struct i40e_hw *hw = &pf->hw;
 	u32 val;
 
 	val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
-	      (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) |
+	      I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
 	      (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
 
 	wr32(hw, I40E_PFINT_DYN_CTL0, val);
@@ -3471,6 +3664,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 	int tx_int_idx = 0;
 	int vector, err;
 	int irq_num;
+	int cpu;
 
 	for (vector = 0; vector < q_vectors; vector++) {
 		struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
@@ -3506,10 +3700,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 		q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
 		q_vector->affinity_notify.release = i40e_irq_affinity_release;
 		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
-		/* get_cpu_mask returns a static constant mask with
-		 * a permanent lifetime so it's ok to use here.
+		/* Spread affinity hints out across online CPUs.
+		 *
+		 * get_cpu_mask returns a static constant mask with
+		 * a permanent lifetime so it's ok to pass to
+		 * irq_set_affinity_hint without making a copy.
 		 */
-		irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
+		cpu = cpumask_local_spread(q_vector->v_idx, -1);
+		irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
 	}
 
 	vsi->irqs_ready = true;
@@ -3585,7 +3783,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
 		for (i = 0; i < vsi->num_q_vectors; i++)
 			i40e_irq_dynamic_enable(vsi, i);
 	} else {
-		i40e_irq_dynamic_enable_icr0(pf, true);
+		i40e_irq_dynamic_enable_icr0(pf);
 	}
 
 	i40e_flush(&pf->hw);
@@ -3593,14 +3791,20 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
 }
 
 /**
- * i40e_stop_misc_vector - Stop the vector that handles non-queue events
+ * i40e_free_misc_vector - Free the vector that handles non-queue events
  * @pf: board private structure
  **/
-static void i40e_stop_misc_vector(struct i40e_pf *pf)
+static void i40e_free_misc_vector(struct i40e_pf *pf)
 {
 	/* Disable ICR 0 */
 	wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
 	i40e_flush(&pf->hw);
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+		synchronize_irq(pf->msix_entries[0].vector);
+		free_irq(pf->msix_entries[0].vector, pf);
+		clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
+	}
 }
 
 /**
@@ -3728,7 +3932,7 @@ enable_intr:
 	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
 	if (!test_bit(__I40E_DOWN, pf->state)) {
 		i40e_service_event_schedule(pf);
-		i40e_irq_dynamic_enable_icr0(pf, false);
+		i40e_irq_dynamic_enable_icr0(pf);
 	}
 
 	return ret;
@@ -4455,11 +4659,7 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
 {
 	int i;
 
-	i40e_stop_misc_vector(pf);
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
-		synchronize_irq(pf->msix_entries[0].vector);
-		free_irq(pf->msix_entries[0].vector, pf);
-	}
+	i40e_free_misc_vector(pf);
 
 	i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
 		      I40E_IWARP_IRQ_PILE_ID);
@@ -4848,6 +5048,24 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
 }
 
 /**
+ * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
+ * @pf: PF being queried
+ *
+ * Query the current MQPRIO configuration and return the number of
+ * traffic classes enabled.
+ **/
+static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
+	u8 enabled_tc = 1, i;
+
+	for (i = 1; i < num_tc; i++)
+		enabled_tc |= BIT(i);
+	return enabled_tc;
+}
+
+/**
  * i40e_pf_get_num_tc - Get enabled traffic classes for PF
  * @pf: PF being queried
  *
@@ -4860,7 +5078,10 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
 	u8 num_tc = 0;
 	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
 
-	/* If DCB is not enabled then always in single TC */
+	if (pf->flags & I40E_FLAG_TC_MQPRIO)
+		return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+
+	/* If neither MQPRIO nor DCB is enabled, then always use single TC */
 	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
 		return 1;
 
@@ -4889,7 +5110,12 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
  **/
 static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
 {
-	/* If DCB is not enabled for this PF then just return default TC */
+	if (pf->flags & I40E_FLAG_TC_MQPRIO)
+		return i40e_mqprio_get_enabled_tc(pf);
+
+	/* If neither MQPRIO nor DCB is enabled for this PF then just return
+	 * default TC
+	 */
 	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
 		return I40E_DEFAULT_TRAFFIC_CLASS;
 
@@ -4979,6 +5205,16 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
 	i40e_status ret;
 	int i;
 
+	if (vsi->back->flags & I40E_FLAG_TC_MQPRIO)
+		return 0;
+	if (!vsi->mqprio_qopt.qopt.hw) {
+		ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+		if (ret)
+			dev_info(&vsi->back->pdev->dev,
+				 "Failed to reset tx rate for vsi->seid %u\n",
+				 vsi->seid);
+		return ret;
+	}
 	bw_data.tc_valid_bits = enabled_tc;
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
 		bw_data.tc_bw_credits[i] = bw_share[i];
@@ -5041,6 +5277,9 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
 					vsi->tc_config.tc_info[i].qoffset);
 	}
 
+	if (pf->flags & I40E_FLAG_TC_MQPRIO)
+		return;
+
 	/* Assign UP2TC map for the VSI */
 	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
 		/* Get the actual TC# for the UP */
@@ -5091,7 +5330,8 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
 	int i;
 
 	/* Check if enabled_tc is same as existing or new TCs */
-	if (vsi->tc_config.enabled_tc == enabled_tc)
+	if (vsi->tc_config.enabled_tc == enabled_tc &&
+	    vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
 		return ret;
 
 	/* Enable ETS TCs with equal BW Share for now across all VSIs */
@@ -5114,15 +5354,37 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
 	ctxt.vf_num = 0;
 	ctxt.uplink_seid = vsi->uplink_seid;
 	ctxt.info = vsi->info;
-	i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+	if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
+		ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+		if (ret)
+			goto out;
+	} else {
+		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+	}
 
+	/* On destroying the qdisc, reset vsi->rss_size, as number of enabled
+	 * queues changed.
+	 */
+	if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
+		vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
+				      vsi->num_queue_pairs);
+		ret = i40e_vsi_config_rss(vsi);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "Failed to reconfig rss for num_queues\n");
+			return ret;
+		}
+		vsi->reconfig_rss = false;
+	}
 	if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
 		ctxt.info.valid_sections |=
 				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
 		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
 	}
 
-	/* Update the VSI after updating the VSI queue-mapping information */
+	/* Update the VSI after updating the VSI queue-mapping
+	 * information
+	 */
 	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
 	if (ret) {
 		dev_info(&vsi->back->pdev->dev,
@@ -5154,6 +5416,825 @@ out:
 }
 
 /**
+ * i40e_get_link_speed - Returns link speed for the interface
+ * @vsi: VSI to be configured
+ *
+ **/
+int i40e_get_link_speed(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	switch (pf->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		return 40000;
+	case I40E_LINK_SPEED_25GB:
+		return 25000;
+	case I40E_LINK_SPEED_20GB:
+		return 20000;
+	case I40E_LINK_SPEED_10GB:
+		return 10000;
+	case I40E_LINK_SPEED_1GB:
+		return 1000;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @seid: seid of the channel/VSI
+ * @max_tx_rate: max TX rate to be configured as BW limit
+ *
+ * Helper function to set BW limit for a given VSI
+ **/
+int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
+{
+	struct i40e_pf *pf = vsi->back;
+	u64 credits = 0;
+	int speed = 0;
+	int ret = 0;
+
+	speed = i40e_get_link_speed(vsi);
+	if (max_tx_rate > speed) {
+		dev_err(&pf->pdev->dev,
+			"Invalid max tx rate %llu specified for VSI seid %d.",
+			max_tx_rate, seid);
+		return -EINVAL;
+	}
+	if (max_tx_rate && max_tx_rate < 50) {
+		dev_warn(&pf->pdev->dev,
+			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
+		max_tx_rate = 50;
+	}
+
+	/* Tx rate credits are in values of 50Mbps, 0 is disabled */
+	credits = max_tx_rate;
+	do_div(credits, I40E_BW_CREDIT_DIVISOR);
+	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
+					  I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+	if (ret)
+		dev_err(&pf->pdev->dev,
+			"Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n",
+			max_tx_rate, seid, i40e_stat_str(&pf->hw, ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+	return ret;
+}
+
+/**
+ * i40e_remove_queue_channels - Remove queue channels for the TCs
+ * @vsi: VSI to be configured
+ *
+ * Remove queue channels for the TCs
+ **/
+static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+{
+	enum i40e_admin_queue_err last_aq_status;
+	struct i40e_cloud_filter *cfilter;
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_pf *pf = vsi->back;
+	struct hlist_node *node;
+	int ret, i;
+
+	/* Reset rss size that was stored when reconfiguring rss for
+	 * channel VSIs with non-power-of-2 queue count.
+	 */
+	vsi->current_rss_size = 0;
+
+	/* perform cleanup for channels if they exist */
+	if (list_empty(&vsi->ch_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		struct i40e_vsi *p_vsi;
+
+		list_del(&ch->list);
+		p_vsi = ch->parent_vsi;
+		if (!p_vsi || !ch->initialized) {
+			kfree(ch);
+			continue;
+		}
+		/* Reset queue contexts */
+		for (i = 0; i < ch->num_queue_pairs; i++) {
+			struct i40e_ring *tx_ring, *rx_ring;
+			u16 pf_q;
+
+			pf_q = ch->base_queue + i;
+			tx_ring = vsi->tx_rings[pf_q];
+			tx_ring->ch = NULL;
+
+			rx_ring = vsi->rx_rings[pf_q];
+			rx_ring->ch = NULL;
+		}
+
+		/* Reset BW configured for this VSI via mqprio */
+		ret = i40e_set_bw_limit(vsi, ch->seid, 0);
+		if (ret)
+			dev_info(&vsi->back->pdev->dev,
+				 "Failed to reset tx rate for ch->seid %u\n",
+				 ch->seid);
+
+		/* delete cloud filters associated with this channel */
+		hlist_for_each_entry_safe(cfilter, node,
+					  &pf->cloud_filter_list, cloud_node) {
+			if (cfilter->seid != ch->seid)
+				continue;
+
+			hash_del(&cfilter->cloud_node);
+			if (cfilter->dst_port)
+				ret = i40e_add_del_cloud_filter_big_buf(vsi,
+									cfilter,
+									false);
+			else
+				ret = i40e_add_del_cloud_filter(vsi, cfilter,
+								false);
+			last_aq_status = pf->hw.aq.asq_last_status;
+			if (ret)
+				dev_info(&pf->pdev->dev,
+					 "Failed to delete cloud filter, err %s aq_err %s\n",
+					 i40e_stat_str(&pf->hw, ret),
+					 i40e_aq_str(&pf->hw, last_aq_status));
+			kfree(cfilter);
+		}
+
+		/* delete VSI from FW */
+		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+					     NULL);
+		if (ret)
+			dev_err(&vsi->back->pdev->dev,
+				"unable to remove channel (%d) for parent VSI(%d)\n",
+				ch->seid, p_vsi->seid);
+		kfree(ch);
+	}
+	INIT_LIST_HEAD(&vsi->ch_list);
+}
+
+/**
+ * i40e_is_any_channel - channel exist or not
+ * @vsi: ptr to VSI to which channels are associated with
+ *
+ * Returns true or false if channel(s) exist for associated VSI or not
+ **/
+static bool i40e_is_any_channel(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		if (ch->initialized)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * i40e_get_max_queues_for_channel
+ * @vsi: ptr to VSI to which channels are associated with
+ *
+ * Helper function which returns max value among the queue counts set on the
+ * channels/TCs created.
+ **/
+static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	int max = 0;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		if (!ch->initialized)
+			continue;
+		if (ch->num_queue_pairs > max)
+			max = ch->num_queue_pairs;
+	}
+
+	return max;
+}
+
+/**
+ * i40e_validate_num_queues - validate num_queues w.r.t channel
+ * @pf: ptr to PF device
+ * @num_queues: number of queues
+ * @vsi: the parent VSI
+ * @reconfig_rss: indicates should the RSS be reconfigured or not
+ *
+ * This function validates number of queues in the context of new channel
+ * which is being established and determines if RSS should be reconfigured
+ * or not for parent VSI.
+ **/
+static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
+				    struct i40e_vsi *vsi, bool *reconfig_rss)
+{
+	int max_ch_queues;
+
+	if (!reconfig_rss)
+		return -EINVAL;
+
+	*reconfig_rss = false;
+
+	if (num_queues > I40E_MAX_QUEUES_PER_CH) {
+		dev_err(&pf->pdev->dev,
+			"Failed to create VMDq VSI. User requested num_queues (%d) > I40E_MAX_QUEUES_PER_VSI (%u)\n",
+			num_queues, I40E_MAX_QUEUES_PER_CH);
+		return -EINVAL;
+	}
+
+	if (vsi->current_rss_size) {
+		if (num_queues > vsi->current_rss_size) {
+			dev_dbg(&pf->pdev->dev,
+				"Error: num_queues (%d) > vsi's current_size(%d)\n",
+				num_queues, vsi->current_rss_size);
+			return -EINVAL;
+		} else if ((num_queues < vsi->current_rss_size) &&
+			   (!is_power_of_2(num_queues))) {
+			dev_dbg(&pf->pdev->dev,
+				"Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
+				num_queues, vsi->current_rss_size);
+			return -EINVAL;
+		}
+	}
+
+	if (!is_power_of_2(num_queues)) {
+		/* Find the max num_queues configured for channel if channel
+		 * exist.
+		 * if channel exist, then enforce 'num_queues' to be more than
+		 * max ever queues configured for channel.
+		 */
+		max_ch_queues = i40e_get_max_queues_for_channel(vsi);
+		if (num_queues < max_ch_queues) {
+			dev_dbg(&pf->pdev->dev,
+				"Error: num_queues (%d) < max queues configured for channel(%d)\n",
+				num_queues, max_ch_queues);
+			return -EINVAL;
+		}
+		*reconfig_rss = true;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
+ * @vsi: the VSI being setup
+ * @rss_size: size of RSS, accordingly LUT gets reprogrammed
+ *
+ * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
+ **/
+static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 seed[I40E_HKEY_ARRAY_SIZE];
+	struct i40e_hw *hw = &pf->hw;
+	int local_rss_size;
+	u8 *lut;
+	int ret;
+
+	if (!vsi->rss_size)
+		return -EINVAL;
+
+	if (rss_size > vsi->rss_size)
+		return -EINVAL;
+
+	local_rss_size = min_t(int, vsi->rss_size, rss_size);
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* Ignoring user configured lut if there is one */
+	i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
+
+	/* Use user configured hash key if there is one, otherwise
+	 * use default.
+	 */
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+
+	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Cannot set RSS lut, err %s aq_err %s\n",
+			 i40e_stat_str(hw, ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		kfree(lut);
+		return ret;
+	}
+	kfree(lut);
+
+	/* Do the update w.r.t. storing rss_size */
+	if (!vsi->orig_rss_size)
+		vsi->orig_rss_size = vsi->rss_size;
+	vsi->current_rss_size = local_rss_size;
+
+	return ret;
+}
+
+/**
+ * i40e_channel_setup_queue_map - Setup a channel queue map
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ctxt: VSI context structure
+ * @ch: ptr to channel structure
+ *
+ * Setup queue map for a specific channel
+ **/
+static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
+					 struct i40e_vsi_context *ctxt,
+					 struct i40e_channel *ch)
+{
+	u16 qcount, qmap, sections = 0;
+	u8 offset = 0;
+	int pow;
+
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+
+	qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
+	ch->num_queue_pairs = qcount;
+
+	/* find the next higher power-of-2 of num queue pairs */
+	pow = ilog2(qcount);
+	if (!is_power_of_2(qcount))
+		pow++;
+
+	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+	/* Setup queue TC[0].qmap for given VSI context */
+	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+
+	ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
+	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
+	ctxt->info.valid_sections |= cpu_to_le16(sections);
+}
+
+/**
+ * i40e_add_channel - add a channel by adding VSI
+ * @pf: ptr to PF device
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @ch: ptr to channel structure
+ *
+ * Add a channel (VSI) using add_vsi and queue_map
+ **/
+static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
+			    struct i40e_channel *ch)
+{
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vsi_context ctxt;
+	u8 enabled_tc = 0x1; /* TC0 enabled */
+	int ret;
+
+	if (ch->type != I40E_VSI_VMDQ2) {
+		dev_info(&pf->pdev->dev,
+			 "add new vsi failed, ch->type %d\n", ch->type);
+		return -EINVAL;
+	}
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.pf_num = hw->pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = uplink_seid;
+	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+	if (ch->type == I40E_VSI_VMDQ2)
+		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+
+	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+		ctxt.info.valid_sections |=
+		     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+		ctxt.info.switch_id =
+		   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+	}
+
+	/* Set queue map for a given VSI context */
+	i40e_channel_setup_queue_map(pf, &ctxt, ch);
+
+	/* Now time to create VSI */
+	ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "add new vsi failed, err %s aq_err %s\n",
+			 i40e_stat_str(&pf->hw, ret),
+			 i40e_aq_str(&pf->hw,
+				     pf->hw.aq.asq_last_status));
+		return -ENOENT;
+	}
+
+	/* Success, update channel */
+	ch->enabled_tc = enabled_tc;
+	ch->seid = ctxt.seid;
+	ch->vsi_number = ctxt.vsi_number;
+	ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
+
+	/* copy just the sections touched not the entire info
+	 * since not all sections are valid as returned by
+	 * update vsi params
+	 */
+	ch->info.mapping_flags = ctxt.info.mapping_flags;
+	memcpy(&ch->info.queue_mapping,
+	       &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
+	memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
+	       sizeof(ctxt.info.tc_mapping));
+
+	return 0;
+}
+
+static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
+				  u8 *bw_share)
+{
+	struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+	i40e_status ret;
+	int i;
+
+	bw_data.tc_valid_bits = ch->enabled_tc;
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		bw_data.tc_bw_credits[i] = bw_share[i];
+
+	ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
+				       &bw_data, NULL);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
+			 vsi->back->hw.aq.asq_last_status, ch->seid);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		ch->info.qs_handle[i] = bw_data.qs_handles[i];
+
+	return 0;
+}
+
+/**
+ * i40e_channel_config_tx_ring - config TX ring associated with new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Configure TX rings associated with channel (VSI) since queues are being
+ * from parent VSI.
+ **/
+static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
+				       struct i40e_vsi *vsi,
+				       struct i40e_channel *ch)
+{
+	i40e_status ret;
+	int i;
+	u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+
+	/* Enable ETS TCs with equal BW Share for now across all VSIs */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (ch->enabled_tc & BIT(i))
+			bw_share[i] = 1;
+	}
+
+	/* configure BW for new VSI */
+	ret = i40e_channel_config_bw(vsi, ch, bw_share);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed configuring TC map %d for channel (seid %u)\n",
+			 ch->enabled_tc, ch->seid);
+		return ret;
+	}
+
+	for (i = 0; i < ch->num_queue_pairs; i++) {
+		struct i40e_ring *tx_ring, *rx_ring;
+		u16 pf_q;
+
+		pf_q = ch->base_queue + i;
+
+		/* Get to TX ring ptr of main VSI, for re-setup TX queue
+		 * context
+		 */
+		tx_ring = vsi->tx_rings[pf_q];
+		tx_ring->ch = ch;
+
+		/* Get the RX ring ptr */
+		rx_ring = vsi->rx_rings[pf_q];
+		rx_ring->ch = ch;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_setup_hw_channel - setup new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @type: type of channel to be created (VMDq2/VF)
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and configures TX rings accordingly
+ **/
+static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
+					struct i40e_vsi *vsi,
+					struct i40e_channel *ch,
+					u16 uplink_seid, u8 type)
+{
+	int ret;
+
+	ch->initialized = false;
+	ch->base_queue = vsi->next_base_queue;
+	ch->type = type;
+
+	/* Proceed with creation of channel (VMDq2) VSI */
+	ret = i40e_add_channel(pf, uplink_seid, ch);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "failed to add_channel using uplink_seid %u\n",
+			 uplink_seid);
+		return ret;
+	}
+
+	/* Mark the successful creation of channel */
+	ch->initialized = true;
+
+	/* Reconfigure TX queues using QTX_CTL register */
+	ret = i40e_channel_config_tx_ring(pf, vsi, ch);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "failed to configure TX rings for channel %u\n",
+			 ch->seid);
+		return ret;
+	}
+
+	/* update 'next_base_queue' */
+	vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
+	dev_dbg(&pf->pdev->dev,
+		"Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
+		ch->seid, ch->vsi_number, ch->stat_counter_idx,
+		ch->num_queue_pairs,
+		vsi->next_base_queue);
+	return ret;
+}
+
+/**
+ * i40e_setup_channel - setup new channel using uplink element
+ * @pf: ptr to PF device
+ * @type: type of channel to be created (VMDq2/VF)
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @ch: ptr to channel structure
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and uplink switching element (uplink_seid)
+ **/
+static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
+			       struct i40e_channel *ch)
+{
+	u8 vsi_type;
+	u16 seid;
+	int ret;
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		vsi_type = I40E_VSI_VMDQ2;
+	} else {
+		dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
+			vsi->type);
+		return false;
+	}
+
+	/* underlying switching element */
+	seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+
+	/* create channel (VSI), configure TX rings */
+	ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
+		return false;
+	}
+
+	return ch->initialized ? true : false;
+}
+
+/**
+ * i40e_validate_and_set_switch_mode - sets up switch mode correctly
+ * @vsi: ptr to VSI which has PF backing
+ *
+ * Sets up switch mode correctly if it needs to be changed and perform
+ * what are allowed modes.
+ **/
+static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
+{
+	u8 mode;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int ret;
+
+	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities);
+	if (ret)
+		return -EINVAL;
+
+	if (hw->dev_caps.switch_mode) {
+		/* if switch mode is set, support mode2 (non-tunneled for
+		 * cloud filter) for now
+		 */
+		u32 switch_mode = hw->dev_caps.switch_mode &
+				  I40E_SWITCH_MODE_MASK;
+		if (switch_mode >= I40E_CLOUD_FILTER_MODE1) {
+			if (switch_mode == I40E_CLOUD_FILTER_MODE2)
+				return 0;
+			dev_err(&pf->pdev->dev,
+				"Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
+				hw->dev_caps.switch_mode);
+			return -EINVAL;
+		}
+	}
+
+	/* Set Bit 7 to be valid */
+	mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
+
+	/* Set L4type to both TCP and UDP support */
+	mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH;
+
+	/* Set cloud filter mode */
+	mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
+
+	/* Prep mode field for set_switch_config */
+	ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
+					pf->last_sw_conf_valid_flags,
+					mode, NULL);
+	if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
+		dev_err(&pf->pdev->dev,
+			"couldn't set switch config bits, err %s aq_err %s\n",
+			i40e_stat_str(hw, ret),
+			i40e_aq_str(hw,
+				    hw->aq.asq_last_status));
+
+	return ret;
+}
+
+/**
+ * i40e_create_queue_channel - function to create channel
+ * @vsi: VSI to be configured
+ * @ch: ptr to channel (it contains channel specific params)
+ *
+ * This function creates channel (VSI) using num_queues specified by user,
+ * reconfigs RSS if needed.
+ **/
+int i40e_create_queue_channel(struct i40e_vsi *vsi,
+			      struct i40e_channel *ch)
+{
+	struct i40e_pf *pf = vsi->back;
+	bool reconfig_rss;
+	int err;
+
+	if (!ch)
+		return -EINVAL;
+
+	if (!ch->num_queue_pairs) {
+		dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
+			ch->num_queue_pairs);
+		return -EINVAL;
+	}
+
+	/* validate user requested num_queues for channel */
+	err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
+				       &reconfig_rss);
+	if (err) {
+		dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
+			 ch->num_queue_pairs);
+		return -EINVAL;
+	}
+
+	/* By default we are in VEPA mode, if this is the first VF/VMDq
+	 * VSI to be added switch to VEB mode.
+	 */
+	if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
+	    (!i40e_is_any_channel(vsi))) {
+		if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
+			dev_dbg(&pf->pdev->dev,
+				"Failed to create channel. Override queues (%u) not power of 2\n",
+				vsi->tc_config.tc_info[0].qcount);
+			return -EINVAL;
+		}
+
+		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+
+			if (vsi->type == I40E_VSI_MAIN) {
+				if (pf->flags & I40E_FLAG_TC_MQPRIO)
+					i40e_do_reset(pf, I40E_PF_RESET_FLAG,
+						      true);
+				else
+					i40e_do_reset_safe(pf,
+							   I40E_PF_RESET_FLAG);
+			}
+		}
+		/* now onwards for main VSI, number of queues will be value
+		 * of TC0's queue count
+		 */
+	}
+
+	/* By this time, vsi->cnt_q_avail shall be set to non-zero and
+	 * it should be more than num_queues
+	 */
+	if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
+		dev_dbg(&pf->pdev->dev,
+			"Error: cnt_q_avail (%u) less than num_queues %d\n",
+			vsi->cnt_q_avail, ch->num_queue_pairs);
+		return -EINVAL;
+	}
+
+	/* reconfig_rss only if vsi type is MAIN_VSI */
+	if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
+		err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "Error: unable to reconfig rss for num_queues (%u)\n",
+				 ch->num_queue_pairs);
+			return -EINVAL;
+		}
+	}
+
+	if (!i40e_setup_channel(pf, vsi, ch)) {
+		dev_info(&pf->pdev->dev, "Failed to setup channel\n");
+		return -EINVAL;
+	}
+
+	dev_info(&pf->pdev->dev,
+		 "Setup channel (id:%u) utilizing num_queues %d\n",
+		 ch->seid, ch->num_queue_pairs);
+
+	/* configure VSI for BW limit */
+	if (ch->max_tx_rate) {
+		u64 credits = ch->max_tx_rate;
+
+		if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
+			return -EINVAL;
+
+		do_div(credits, I40E_BW_CREDIT_DIVISOR);
+		dev_dbg(&pf->pdev->dev,
+			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+			ch->max_tx_rate,
+			credits,
+			ch->seid);
+	}
+
+	/* in case of VF, this will be main SRIOV VSI */
+	ch->parent_vsi = vsi;
+
+	/* and update main_vsi's count for queue_available to use */
+	vsi->cnt_q_avail -= ch->num_queue_pairs;
+
+	return 0;
+}
+
+/**
+ * i40e_configure_queue_channels - Add queue channel for the given TCs
+ * @vsi: VSI to be configured
+ *
+ * Configures queue channel mapping to the given TCs
+ **/
+static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch;
+	u64 max_rate = 0;
+	int ret = 0, i;
+
+	/* Create app vsi with the TCs. Main VSI with TC0 is already set up */
+	vsi->tc_seid_map[0] = vsi->seid;
+	for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (vsi->tc_config.enabled_tc & BIT(i)) {
+			ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+			if (!ch) {
+				ret = -ENOMEM;
+				goto err_free;
+			}
+
+			INIT_LIST_HEAD(&ch->list);
+			ch->num_queue_pairs =
+				vsi->tc_config.tc_info[i].qcount;
+			ch->base_queue =
+				vsi->tc_config.tc_info[i].qoffset;
+
+			/* Bandwidth limit through tc interface is in bytes/s,
+			 * change to Mbit/s
+			 */
+			max_rate = vsi->mqprio_qopt.max_rate[i];
+			do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+			ch->max_tx_rate = max_rate;
+
+			list_add_tail(&ch->list, &vsi->ch_list);
+
+			ret = i40e_create_queue_channel(vsi, ch);
+			if (ret) {
+				dev_err(&vsi->back->pdev->dev,
+					"Failed creating queue channel with TC%d: queues %d\n",
+					i, ch->num_queue_pairs);
+				goto err_free;
+			}
+			vsi->tc_seid_map[i] = ch->seid;
+		}
+	}
+	return ret;
+
+err_free:
+	i40e_remove_queue_channels(vsi);
+	return ret;
+}
+
+/**
  * i40e_veb_config_tc - Configure TCs for given VEB
  * @veb: given VEB
  * @enabled_tc: TC bitmap
@@ -5346,13 +6427,14 @@ out:
 void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 {
 	enum i40e_aq_link_speed new_speed;
+	struct i40e_pf *pf = vsi->back;
 	char *speed = "Unknown";
 	char *fc = "Unknown";
 	char *fec = "";
 	char *req_fec = "";
 	char *an = "";
 
-	new_speed = vsi->back->hw.phy.link_info.link_speed;
+	new_speed = pf->hw.phy.link_info.link_speed;
 
 	if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
 		return;
@@ -5366,13 +6448,13 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 	/* Warn user if link speed on NPAR enabled partition is not at
 	 * least 10GB
 	 */
-	if (vsi->back->hw.func_caps.npar_enable &&
-	    (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
-	     vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
+	if (pf->hw.func_caps.npar_enable &&
+	    (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
+	     pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
 		netdev_warn(vsi->netdev,
 			    "The partition detected link speed that is less than 10Gbps\n");
 
-	switch (vsi->back->hw.phy.link_info.link_speed) {
+	switch (pf->hw.phy.link_info.link_speed) {
 	case I40E_LINK_SPEED_40GB:
 		speed = "40 G";
 		break;
@@ -5395,7 +6477,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 		break;
 	}
 
-	switch (vsi->back->hw.fc.current_mode) {
+	switch (pf->hw.fc.current_mode) {
 	case I40E_FC_FULL:
 		fc = "RX/TX";
 		break;
@@ -5410,18 +6492,18 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 		break;
 	}
 
-	if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
+	if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
 		req_fec = ", Requested FEC: None";
 		fec = ", FEC: None";
 		an = ", Autoneg: False";
 
-		if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
 			an = ", Autoneg: True";
 
-		if (vsi->back->hw.phy.link_info.fec_info &
+		if (pf->hw.phy.link_info.fec_info &
 		    I40E_AQ_CONFIG_FEC_KR_ENA)
 			fec = ", FEC: CL74 FC-FEC/BASE-R";
-		else if (vsi->back->hw.phy.link_info.fec_info &
+		else if (pf->hw.phy.link_info.fec_info &
 			 I40E_AQ_CONFIG_FEC_RS_ENA)
 			fec = ", FEC: CL108 RS-FEC";
 
@@ -5470,15 +6552,6 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
 		i40e_print_link_message(vsi, true);
 		netif_tx_start_all_queues(vsi->netdev);
 		netif_carrier_on(vsi->netdev);
-	} else if (vsi->netdev) {
-		i40e_print_link_message(vsi, false);
-		/* need to check for qualified module here*/
-		if ((pf->hw.phy.link_info.link_info &
-			I40E_AQ_MEDIA_AVAILABLE) &&
-		    (!(pf->hw.phy.link_info.an_info &
-			I40E_AQ_QUALIFIED_MODULE)))
-			netdev_err(vsi->netdev,
-				   "the driver failed to link because an unqualified module was detected.");
 	}
 
 	/* replay FDIR SB filters */
@@ -5562,74 +6635,928 @@ void i40e_down(struct i40e_vsi *vsi)
 }
 
 /**
+ * i40e_validate_mqprio_qopt- validate queue mapping info
+ * @vsi: the VSI being configured
+ * @mqprio_qopt: queue parametrs
+ **/
+static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
+				     struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	u64 sum_max_rate = 0;
+	u64 max_rate = 0;
+	int i;
+
+	if (mqprio_qopt->qopt.offset[0] != 0 ||
+	    mqprio_qopt->qopt.num_tc < 1 ||
+	    mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
+		return -EINVAL;
+	for (i = 0; ; i++) {
+		if (!mqprio_qopt->qopt.count[i])
+			return -EINVAL;
+		if (mqprio_qopt->min_rate[i]) {
+			dev_err(&vsi->back->pdev->dev,
+				"Invalid min tx rate (greater than 0) specified\n");
+			return -EINVAL;
+		}
+		max_rate = mqprio_qopt->max_rate[i];
+		do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+		sum_max_rate += max_rate;
+
+		if (i >= mqprio_qopt->qopt.num_tc - 1)
+			break;
+		if (mqprio_qopt->qopt.offset[i + 1] !=
+		    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+			return -EINVAL;
+	}
+	if (vsi->num_queue_pairs <
+	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+		return -EINVAL;
+	}
+	if (sum_max_rate > i40e_get_link_speed(vsi)) {
+		dev_err(&vsi->back->pdev->dev,
+			"Invalid max tx rate specified\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * i40e_vsi_set_default_tc_config - set default values for tc configuration
+ * @vsi: the VSI being configured
+ **/
+static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
+{
+	u16 qcount;
+	int i;
+
+	/* Only TC0 is enabled */
+	vsi->tc_config.numtc = 1;
+	vsi->tc_config.enabled_tc = 1;
+	qcount = min_t(int, vsi->alloc_queue_pairs,
+		       i40e_pf_get_max_q_per_tc(vsi->back));
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		/* For the TC that is not enabled set the offset to to default
+		 * queue and allocate one queue for the given TC.
+		 */
+		vsi->tc_config.tc_info[i].qoffset = 0;
+		if (i == 0)
+			vsi->tc_config.tc_info[i].qcount = qcount;
+		else
+			vsi->tc_config.tc_info[i].qcount = 1;
+		vsi->tc_config.tc_info[i].netdev_tc = 0;
+	}
+}
+
+/**
  * i40e_setup_tc - configure multiple traffic classes
  * @netdev: net device to configure
- * @tc: number of traffic classes to enable
+ * @type_data: tc offload data
  **/
-static int i40e_setup_tc(struct net_device *netdev, u8 tc)
+static int i40e_setup_tc(struct net_device *netdev, void *type_data)
 {
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	u8 enabled_tc = 0;
+	u8 enabled_tc = 0, num_tc, hw;
+	bool need_reset = false;
 	int ret = -EINVAL;
+	u16 mode;
 	int i;
 
-	/* Check if DCB enabled to continue */
-	if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
-		netdev_info(netdev, "DCB is not enabled for adapter\n");
-		goto exit;
+	num_tc = mqprio_qopt->qopt.num_tc;
+	hw = mqprio_qopt->qopt.hw;
+	mode = mqprio_qopt->mode;
+	if (!hw) {
+		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+		goto config_tc;
 	}
 
 	/* Check if MFP enabled */
 	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
-		netdev_info(netdev, "Configuring TC not supported in MFP mode\n");
-		goto exit;
+		netdev_info(netdev,
+			    "Configuring TC not supported in MFP mode\n");
+		return ret;
 	}
+	switch (mode) {
+	case TC_MQPRIO_MODE_DCB:
+		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
 
-	/* Check whether tc count is within enabled limit */
-	if (tc > i40e_pf_get_num_tc(pf)) {
-		netdev_info(netdev, "TC count greater than enabled on link for adapter\n");
-		goto exit;
+		/* Check if DCB enabled to continue */
+		if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+			netdev_info(netdev,
+				    "DCB is not enabled for adapter\n");
+			return ret;
+		}
+
+		/* Check whether tc count is within enabled limit */
+		if (num_tc > i40e_pf_get_num_tc(pf)) {
+			netdev_info(netdev,
+				    "TC count greater than enabled on link for adapter\n");
+			return ret;
+		}
+		break;
+	case TC_MQPRIO_MODE_CHANNEL:
+		if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+			netdev_info(netdev,
+				    "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
+			return ret;
+		}
+		if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+			return ret;
+		ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
+		if (ret)
+			return ret;
+		memcpy(&vsi->mqprio_qopt, mqprio_qopt,
+		       sizeof(*mqprio_qopt));
+		pf->flags |= I40E_FLAG_TC_MQPRIO;
+		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		break;
+	default:
+		return -EINVAL;
 	}
 
+config_tc:
 	/* Generate TC map for number of tc requested */
-	for (i = 0; i < tc; i++)
+	for (i = 0; i < num_tc; i++)
 		enabled_tc |= BIT(i);
 
 	/* Requesting same TC configuration as already enabled */
-	if (enabled_tc == vsi->tc_config.enabled_tc)
+	if (enabled_tc == vsi->tc_config.enabled_tc &&
+	    mode != TC_MQPRIO_MODE_CHANNEL)
 		return 0;
 
 	/* Quiesce VSI queues */
 	i40e_quiesce_vsi(vsi);
 
+	if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
+		i40e_remove_queue_channels(vsi);
+
 	/* Configure VSI for enabled TCs */
 	ret = i40e_vsi_config_tc(vsi, enabled_tc);
 	if (ret) {
 		netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
 			    vsi->seid);
+		need_reset = true;
 		goto exit;
 	}
 
+	if (pf->flags & I40E_FLAG_TC_MQPRIO) {
+		if (vsi->mqprio_qopt.max_rate[0]) {
+			u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+
+			do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+			ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+			if (!ret) {
+				u64 credits = max_tx_rate;
+
+				do_div(credits, I40E_BW_CREDIT_DIVISOR);
+				dev_dbg(&vsi->back->pdev->dev,
+					"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+					max_tx_rate,
+					credits,
+					vsi->seid);
+			} else {
+				need_reset = true;
+				goto exit;
+			}
+		}
+		ret = i40e_configure_queue_channels(vsi);
+		if (ret) {
+			netdev_info(netdev,
+				    "Failed configuring queue channels\n");
+			need_reset = true;
+			goto exit;
+		}
+	}
+
+exit:
+	/* Reset the configuration data to defaults, only TC0 is enabled */
+	if (need_reset) {
+		i40e_vsi_set_default_tc_config(vsi);
+		need_reset = false;
+	}
+
 	/* Unquiesce VSI */
 	i40e_unquiesce_vsi(vsi);
+	return ret;
+}
 
-exit:
+/**
+ * i40e_set_cld_element - sets cloud filter element data
+ * @filter: cloud filter rule
+ * @cld: ptr to cloud filter element data
+ *
+ * This is helper function to copy data into cloud filter element
+ **/
+static inline void
+i40e_set_cld_element(struct i40e_cloud_filter *filter,
+		     struct i40e_aqc_cloud_filters_element_data *cld)
+{
+	int i, j;
+	u32 ipa;
+
+	memset(cld, 0, sizeof(*cld));
+	ether_addr_copy(cld->outer_mac, filter->dst_mac);
+	ether_addr_copy(cld->inner_mac, filter->src_mac);
+
+	if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6)
+		return;
+
+	if (filter->n_proto == ETH_P_IPV6) {
+#define IPV6_MAX_INDEX	(ARRAY_SIZE(filter->dst_ipv6) - 1)
+		for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6);
+		     i++, j += 2) {
+			ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
+			ipa = cpu_to_le32(ipa);
+			memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa));
+		}
+	} else {
+		ipa = be32_to_cpu(filter->dst_ipv4);
+		memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
+	}
+
+	cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id));
+
+	/* tenant_id is not supported by FW now, once the support is enabled
+	 * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
+	 */
+	if (filter->tenant_id)
+		return;
+}
+
+/**
+ * i40e_add_del_cloud_filter - Add/del cloud filter
+ * @vsi: pointer to VSI
+ * @filter: cloud filter rule
+ * @add: if true, add, if false, delete
+ *
+ * Add or delete a cloud filter for a specific flow spec.
+ * Returns 0 if the filter were successfully added.
+ **/
+static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+				     struct i40e_cloud_filter *filter, bool add)
+{
+	struct i40e_aqc_cloud_filters_element_data cld_filter;
+	struct i40e_pf *pf = vsi->back;
+	int ret;
+	static const u16 flag_table[128] = {
+		[I40E_CLOUD_FILTER_FLAGS_OMAC]  =
+			I40E_AQC_ADD_CLOUD_FILTER_OMAC,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC]  =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN]  =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
+		[I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
+			I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
+		[I40E_CLOUD_FILTER_FLAGS_IIP] =
+			I40E_AQC_ADD_CLOUD_FILTER_IIP,
+	};
+
+	if (filter->flags >= ARRAY_SIZE(flag_table))
+		return I40E_ERR_CONFIG;
+
+	/* copy element needed to add cloud filter from filter */
+	i40e_set_cld_element(filter, &cld_filter);
+
+	if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
+		cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
+					     I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
+
+	if (filter->n_proto == ETH_P_IPV6)
+		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+						I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+	else
+		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+						I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
+
+	if (add)
+		ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
+						&cld_filter, 1);
+	else
+		ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
+						&cld_filter, 1);
+	if (ret)
+		dev_dbg(&pf->pdev->dev,
+			"Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
+			add ? "add" : "delete", filter->dst_port, ret,
+			pf->hw.aq.asq_last_status);
+	else
+		dev_info(&pf->pdev->dev,
+			 "%s cloud filter for VSI: %d\n",
+			 add ? "Added" : "Deleted", filter->seid);
 	return ret;
 }
 
-static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-			   void *type_data)
+/**
+ * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf
+ * @vsi: pointer to VSI
+ * @filter: cloud filter rule
+ * @add: if true, add, if false, delete
+ *
+ * Add or delete a cloud filter for a specific flow spec using big buffer.
+ * Returns 0 if the filter were successfully added.
+ **/
+static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+					     struct i40e_cloud_filter *filter,
+					     bool add)
 {
-	struct tc_mqprio_qopt *mqprio = type_data;
+	struct i40e_aqc_cloud_filters_element_bb cld_filter;
+	struct i40e_pf *pf = vsi->back;
+	int ret;
+
+	/* Both (src/dst) valid mac_addr are not supported */
+	if ((is_valid_ether_addr(filter->dst_mac) &&
+	     is_valid_ether_addr(filter->src_mac)) ||
+	    (is_multicast_ether_addr(filter->dst_mac) &&
+	     is_multicast_ether_addr(filter->src_mac)))
+		return -EINVAL;
+
+	/* Make sure port is specified, otherwise bail out, for channel
+	 * specific cloud filter needs 'L4 port' to be non-zero
+	 */
+	if (!filter->dst_port)
+		return -EINVAL;
+
+	/* adding filter using src_port/src_ip is not supported at this stage */
+	if (filter->src_port || filter->src_ipv4 ||
+	    !ipv6_addr_any(&filter->ip.v6.src_ip6))
+		return -EINVAL;
+
+	/* copy element needed to add cloud filter from filter */
+	i40e_set_cld_element(filter, &cld_filter.element);
+
+	if (is_valid_ether_addr(filter->dst_mac) ||
+	    is_valid_ether_addr(filter->src_mac) ||
+	    is_multicast_ether_addr(filter->dst_mac) ||
+	    is_multicast_ether_addr(filter->src_mac)) {
+		/* MAC + IP : unsupported mode */
+		if (filter->dst_ipv4)
+			return -EINVAL;
+
+		/* since we validated that L4 port must be valid before
+		 * we get here, start with respective "flags" value
+		 * and update if vlan is present or not
+		 */
+		cld_filter.element.flags =
+			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
+
+		if (filter->vlan_id) {
+			cld_filter.element.flags =
+			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
+		}
+
+	} else if (filter->dst_ipv4 ||
+		   !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
+		cld_filter.element.flags =
+				cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
+		if (filter->n_proto == ETH_P_IPV6)
+			cld_filter.element.flags |=
+				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+		else
+			cld_filter.element.flags |=
+				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"either mac or ip has to be valid for cloud filter\n");
+		return -EINVAL;
+	}
+
+	/* Now copy L4 port in Byte 6..7 in general fields */
+	cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
+						be16_to_cpu(filter->dst_port);
+
+	if (add) {
+		/* Validate current device switch mode, change if necessary */
+		ret = i40e_validate_and_set_switch_mode(vsi);
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"failed to set switch mode, ret %d\n",
+				ret);
+			return ret;
+		}
 
-	if (type != TC_SETUP_MQPRIO)
+		ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
+						   &cld_filter, 1);
+	} else {
+		ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
+						   &cld_filter, 1);
+	}
+
+	if (ret)
+		dev_dbg(&pf->pdev->dev,
+			"Failed to %s cloud filter(big buffer) err %d aq_err %d\n",
+			add ? "add" : "delete", ret, pf->hw.aq.asq_last_status);
+	else
+		dev_info(&pf->pdev->dev,
+			 "%s cloud filter for VSI: %d, L4 port: %d\n",
+			 add ? "add" : "delete", filter->seid,
+			 ntohs(filter->dst_port));
+	return ret;
+}
+
+/**
+ * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ * @filter: Pointer to cloud filter structure
+ *
+ **/
+static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
+				 struct tc_cls_flower_offload *f,
+				 struct i40e_cloud_filter *filter)
+{
+	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
+	struct i40e_pf *pf = vsi->back;
+	u8 field_flags = 0;
+
+	if (f->dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+		dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n",
+			f->dissector->used_keys);
 		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_dissector_key_keyid *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  f->key);
+
+		struct flow_dissector_key_keyid *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  f->mask);
+
+		if (mask->keyid != 0)
+			field_flags |= I40E_CLOUD_FIELD_TEN_ID;
+
+		filter->tenant_id = be32_to_cpu(key->keyid);
+	}
 
-	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->key);
 
-	return i40e_setup_tc(netdev, mqprio->num_tc);
+		struct flow_dissector_key_basic *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->mask);
+
+		n_proto_key = ntohs(key->n_proto);
+		n_proto_mask = ntohs(mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+		filter->n_proto = n_proto_key & n_proto_mask;
+		filter->ip_proto = key->ip_proto;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->key);
+
+		struct flow_dissector_key_eth_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->mask);
+
+		/* use is_broadcast and is_zero to check for all 0xf or 0 */
+		if (!is_zero_ether_addr(mask->dst)) {
+			if (is_broadcast_ether_addr(mask->dst)) {
+				field_flags |= I40E_CLOUD_FIELD_OMAC;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
+					mask->dst);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(mask->src)) {
+			if (is_broadcast_ether_addr(mask->src)) {
+				field_flags |= I40E_CLOUD_FIELD_IMAC;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
+					mask->src);
+				return I40E_ERR_CONFIG;
+			}
+		}
+		ether_addr_copy(filter->dst_mac, key->dst);
+		ether_addr_copy(filter->src_mac, key->src);
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->key);
+		struct flow_dissector_key_vlan *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->mask);
+
+		if (mask->vlan_id) {
+			if (mask->vlan_id == VLAN_VID_MASK) {
+				field_flags |= I40E_CLOUD_FIELD_IVLAN;
+
+			} else {
+				dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
+					mask->vlan_id);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		filter->vlan_id = cpu_to_be16(key->vlan_id);
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_CONTROL,
+						  f->key);
+
+		addr_type = key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->mask);
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				mask->dst = be32_to_cpu(mask->dst);
+				dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n",
+					&mask->dst);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				mask->src = be32_to_cpu(mask->src);
+				dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n",
+					&mask->src);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
+			dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
+			return I40E_ERR_CONFIG;
+		}
+		filter->dst_ipv4 = key->dst;
+		filter->src_ipv4 = key->src;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_dissector_key_ipv6_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv6_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->mask);
+
+		/* src and dest IPV6 address should not be LOOPBACK
+		 * (0:0:0:0:0:0:0:1), which can be represented as ::1
+		 */
+		if (ipv6_addr_loopback(&key->dst) ||
+		    ipv6_addr_loopback(&key->src)) {
+			dev_err(&pf->pdev->dev,
+				"Bad ipv6, addr is LOOPBACK\n");
+			return I40E_ERR_CONFIG;
+		}
+		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+			field_flags |= I40E_CLOUD_FIELD_IIP;
+
+		memcpy(&filter->src_ipv6, &key->src.s6_addr32,
+		       sizeof(filter->src_ipv6));
+		memcpy(&filter->dst_ipv6, &key->dst.s6_addr32,
+		       sizeof(filter->dst_ipv6));
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->key);
+		struct flow_dissector_key_ports *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->mask);
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be16(0xffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
+					be16_to_cpu(mask->src));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be16(0xffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
+					be16_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		filter->dst_port = key->dst;
+		filter->src_port = key->src;
+
+		switch (filter->ip_proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+			break;
+		default:
+			dev_err(&pf->pdev->dev,
+				"Only UDP and TCP transport are supported\n");
+			return -EINVAL;
+		}
+	}
+	filter->flags = field_flags;
+	return 0;
+}
+
+/**
+ * i40e_handle_tclass: Forward to a traffic class on the device
+ * @vsi: Pointer to VSI
+ * @tc: traffic class index on the device
+ * @filter: Pointer to cloud filter structure
+ *
+ **/
+static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
+			      struct i40e_cloud_filter *filter)
+{
+	struct i40e_channel *ch, *ch_tmp;
+
+	/* direct to a traffic class on the same device */
+	if (tc == 0) {
+		filter->seid = vsi->seid;
+		return 0;
+	} else if (vsi->tc_config.enabled_tc & BIT(tc)) {
+		if (!filter->dst_port) {
+			dev_err(&vsi->back->pdev->dev,
+				"Specify destination port to direct to traffic class that is not default\n");
+			return -EINVAL;
+		}
+		if (list_empty(&vsi->ch_list))
+			return -EINVAL;
+		list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
+					 list) {
+			if (ch->seid == vsi->tc_seid_map[tc])
+				filter->seid = ch->seid;
+		}
+		return 0;
+	}
+	dev_err(&vsi->back->pdev->dev, "TC is not enabled\n");
+	return -EINVAL;
+}
+
+/**
+ * i40e_configure_clsflower - Configure tc flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ *
+ **/
+static int i40e_configure_clsflower(struct i40e_vsi *vsi,
+				    struct tc_cls_flower_offload *cls_flower)
+{
+	int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+	struct i40e_cloud_filter *filter = NULL;
+	struct i40e_pf *pf = vsi->back;
+	int err = 0;
+
+	if (tc < 0) {
+		dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+	    test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+		return -EBUSY;
+
+	if (pf->fdir_pf_active_filters ||
+	    (!hlist_empty(&pf->fdir_filter_list))) {
+		dev_err(&vsi->back->pdev->dev,
+			"Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n");
+		return -EINVAL;
+	}
+
+	if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
+		dev_err(&vsi->back->pdev->dev,
+			"Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
+		vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->cookie = cls_flower->cookie;
+
+	err = i40e_parse_cls_flower(vsi, cls_flower, filter);
+	if (err < 0)
+		goto err;
+
+	err = i40e_handle_tclass(vsi, tc, filter);
+	if (err < 0)
+		goto err;
+
+	/* Add cloud filter */
+	if (filter->dst_port)
+		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true);
+	else
+		err = i40e_add_del_cloud_filter(vsi, filter, true);
+
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"Failed to add cloud filter, err %s\n",
+			i40e_stat_str(&pf->hw, err));
+		err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
+		goto err;
+	}
+
+	/* add filter to the ordered list */
+	INIT_HLIST_NODE(&filter->cloud_node);
+
+	hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list);
+
+	pf->num_cloud_filters++;
+
+	return err;
+err:
+	kfree(filter);
+	return err;
+}
+
+/**
+ * i40e_find_cloud_filter - Find the could filter in the list
+ * @vsi: Pointer to VSI
+ * @cookie: filter specific cookie
+ *
+ **/
+static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
+							unsigned long *cookie)
+{
+	struct i40e_cloud_filter *filter = NULL;
+	struct hlist_node *node2;
+
+	hlist_for_each_entry_safe(filter, node2,
+				  &vsi->back->cloud_filter_list, cloud_node)
+		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+			return filter;
+	return NULL;
+}
+
+/**
+ * i40e_delete_clsflower - Remove tc flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ *
+ **/
+static int i40e_delete_clsflower(struct i40e_vsi *vsi,
+				 struct tc_cls_flower_offload *cls_flower)
+{
+	struct i40e_cloud_filter *filter = NULL;
+	struct i40e_pf *pf = vsi->back;
+	int err = 0;
+
+	filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie);
+
+	if (!filter)
+		return -EINVAL;
+
+	hash_del(&filter->cloud_node);
+
+	if (filter->dst_port)
+		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false);
+	else
+		err = i40e_add_del_cloud_filter(vsi, filter, false);
+
+	kfree(filter);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"Failed to delete cloud filter, err %s\n",
+			i40e_stat_str(&pf->hw, err));
+		return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
+	}
+
+	pf->num_cloud_filters--;
+	if (!pf->num_cloud_filters)
+		if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+		    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+			pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+		}
+	return 0;
+}
+
+/**
+ * i40e_setup_tc_cls_flower - flower classifier offloads
+ * @netdev: net device to configure
+ * @type_data: offload data
+ **/
+static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
+				    struct tc_cls_flower_offload *cls_flower)
+{
+	struct i40e_vsi *vsi = np->vsi;
+
+	if (cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return i40e_configure_clsflower(vsi, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return i40e_delete_clsflower(vsi, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				  void *cb_priv)
+{
+	struct i40e_netdev_priv *np = cb_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return i40e_setup_tc_cls_flower(np, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int i40e_setup_tc_block(struct net_device *dev,
+			       struct tc_block_offload *f)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
+					     np, np);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			   void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return i40e_setup_tc(netdev, type_data);
+	case TC_SETUP_BLOCK:
+		return i40e_setup_tc_block(netdev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 /**
@@ -5747,7 +7674,7 @@ err_setup_rx:
 err_setup_tx:
 	i40e_vsi_free_tx_resources(vsi);
 	if (vsi == pf->vsi[pf->lan_vsi])
-		i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
 
 	return err;
 }
@@ -5810,6 +7737,33 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_cloud_filter_exit - Cleans up the cloud filters
+ * @pf: Pointer to PF
+ *
+ * This function destroys the hlist where all the cloud filters
+ * were saved.
+ **/
+static void i40e_cloud_filter_exit(struct i40e_pf *pf)
+{
+	struct i40e_cloud_filter *cfilter;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(cfilter, node,
+				  &pf->cloud_filter_list, cloud_node) {
+		hlist_del(&cfilter->cloud_node);
+		kfree(cfilter);
+	}
+	pf->num_cloud_filters = 0;
+
+	if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+	    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+		pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+		pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+		pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+	}
+}
+
+/**
  * i40e_close - Disables a network interface
  * @netdev: network interface device structure
  *
@@ -5875,7 +7829,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
 		wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
 		i40e_flush(&pf->hw);
 
-	} else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) {
+	} else if (reset_flags & I40E_PF_RESET_FLAG) {
 
 		/* Request a PF Reset
 		 *
@@ -6226,6 +8180,7 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
 				hlist_del(&filter->fdir_node);
 				kfree(filter);
 				pf->fdir_pf_active_filters--;
+				pf->fd_inv = 0;
 			}
 		}
 	}
@@ -6429,8 +8384,7 @@ static void i40e_link_event(struct i40e_pf *pf)
 	     new_link == netif_carrier_ok(vsi->netdev)))
 		return;
 
-	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-		i40e_print_link_message(vsi, new_link);
+	i40e_print_link_message(vsi, new_link);
 
 	/* Notify the base of the switch tree connected to
 	 * the link.  Floating VEBs are not notified.
@@ -6553,12 +8507,26 @@ static void i40e_handle_link_event(struct i40e_pf *pf,
 	 */
 	i40e_link_event(pf);
 
-	/* check for unqualified module, if link is down */
-	if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
-	    (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
-	    (!(status->link_info & I40E_AQ_LINK_UP)))
+	/* Check if module meets thermal requirements */
+	if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
+		dev_err(&pf->pdev->dev,
+			"Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
 		dev_err(&pf->pdev->dev,
-			"The driver failed to link because an unqualified module was detected.\n");
+			"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+	} else {
+		/* check for unqualified module, if link is down, suppress
+		 * the message if link was forced to be down.
+		 */
+		if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+		    (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
+		    (!(status->link_info & I40E_AQ_LINK_UP)) &&
+		    (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+			dev_err(&pf->pdev->dev,
+				"Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
+			dev_err(&pf->pdev->dev,
+				"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+		}
+	}
 }
 
 /**
@@ -6900,7 +8868,8 @@ end_reconstitute:
  * i40e_get_capabilities - get info about the HW
  * @pf: the PF struct
  **/
-static int i40e_get_capabilities(struct i40e_pf *pf)
+static int i40e_get_capabilities(struct i40e_pf *pf,
+				 enum i40e_admin_queue_opc list_type)
 {
 	struct i40e_aqc_list_capabilities_element_resp *cap_buf;
 	u16 data_size;
@@ -6915,9 +8884,8 @@ static int i40e_get_capabilities(struct i40e_pf *pf)
 
 		/* this loads the data into the hw struct for us */
 		err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
-					    &data_size,
-					    i40e_aqc_opc_list_func_capabilities,
-					    NULL);
+						    &data_size, list_type,
+						    NULL);
 		/* data loaded, buffer no longer needed */
 		kfree(cap_buf);
 
@@ -6934,26 +8902,44 @@ static int i40e_get_capabilities(struct i40e_pf *pf)
 		}
 	} while (err);
 
-	if (pf->hw.debug_mask & I40E_DEBUG_USER)
-		dev_info(&pf->pdev->dev,
-			 "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
-			 pf->hw.pf_id, pf->hw.func_caps.num_vfs,
-			 pf->hw.func_caps.num_msix_vectors,
-			 pf->hw.func_caps.num_msix_vectors_vf,
-			 pf->hw.func_caps.fd_filters_guaranteed,
-			 pf->hw.func_caps.fd_filters_best_effort,
-			 pf->hw.func_caps.num_tx_qp,
-			 pf->hw.func_caps.num_vsis);
-
+	if (pf->hw.debug_mask & I40E_DEBUG_USER) {
+		if (list_type == i40e_aqc_opc_list_func_capabilities) {
+			dev_info(&pf->pdev->dev,
+				 "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
+				 pf->hw.pf_id, pf->hw.func_caps.num_vfs,
+				 pf->hw.func_caps.num_msix_vectors,
+				 pf->hw.func_caps.num_msix_vectors_vf,
+				 pf->hw.func_caps.fd_filters_guaranteed,
+				 pf->hw.func_caps.fd_filters_best_effort,
+				 pf->hw.func_caps.num_tx_qp,
+				 pf->hw.func_caps.num_vsis);
+		} else if (list_type == i40e_aqc_opc_list_dev_capabilities) {
+			dev_info(&pf->pdev->dev,
+				 "switch_mode=0x%04x, function_valid=0x%08x\n",
+				 pf->hw.dev_caps.switch_mode,
+				 pf->hw.dev_caps.valid_functions);
+			dev_info(&pf->pdev->dev,
+				 "SR-IOV=%d, num_vfs for all function=%u\n",
+				 pf->hw.dev_caps.sr_iov_1_1,
+				 pf->hw.dev_caps.num_vfs);
+			dev_info(&pf->pdev->dev,
+				 "num_vsis=%u, num_rx:%u, num_tx=%u\n",
+				 pf->hw.dev_caps.num_vsis,
+				 pf->hw.dev_caps.num_rx_qp,
+				 pf->hw.dev_caps.num_tx_qp);
+		}
+	}
+	if (list_type == i40e_aqc_opc_list_func_capabilities) {
 #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
 		       + pf->hw.func_caps.num_vfs)
-	if (pf->hw.revision_id == 0 && (DEF_NUM_VSI > pf->hw.func_caps.num_vsis)) {
-		dev_info(&pf->pdev->dev,
-			 "got num_vsis %d, setting num_vsis to %d\n",
-			 pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
-		pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+		if (pf->hw.revision_id == 0 &&
+		    pf->hw.func_caps.num_vsis < DEF_NUM_VSI) {
+			dev_info(&pf->pdev->dev,
+				 "got num_vsis %d, setting num_vsis to %d\n",
+				 pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
+			pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+		}
 	}
-
 	return 0;
 }
 
@@ -6995,6 +8981,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 		if (!vsi) {
 			dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
 			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 			return;
 		}
 	}
@@ -7017,6 +9004,95 @@ static void i40e_fdir_teardown(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs
+ * @vsi: PF main vsi
+ * @seid: seid of main or channel VSIs
+ *
+ * Rebuilds cloud filters associated with main VSI and channel VSIs if they
+ * existed before reset
+ **/
+static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+{
+	struct i40e_cloud_filter *cfilter;
+	struct i40e_pf *pf = vsi->back;
+	struct hlist_node *node;
+	i40e_status ret;
+
+	/* Add cloud filters back if they exist */
+	hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
+				  cloud_node) {
+		if (cfilter->seid != seid)
+			continue;
+
+		if (cfilter->dst_port)
+			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+								true);
+		else
+			ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+
+		if (ret) {
+			dev_dbg(&pf->pdev->dev,
+				"Failed to rebuild cloud filter, err %s aq_err %s\n",
+				i40e_stat_str(&pf->hw, ret),
+				i40e_aq_str(&pf->hw,
+					    pf->hw.aq.asq_last_status));
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
+ * @vsi: PF main vsi
+ *
+ * Rebuilds channel VSIs if they existed before reset
+ **/
+static int i40e_rebuild_channels(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	i40e_status ret;
+
+	if (list_empty(&vsi->ch_list))
+		return 0;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		if (!ch->initialized)
+			break;
+		/* Proceed with creation of channel (VMDq2) VSI */
+		ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "failed to rebuild channels using uplink_seid %u\n",
+				 vsi->uplink_seid);
+			return ret;
+		}
+		if (ch->max_tx_rate) {
+			u64 credits = ch->max_tx_rate;
+
+			if (i40e_set_bw_limit(vsi, ch->seid,
+					      ch->max_tx_rate))
+				return -EINVAL;
+
+			do_div(credits, I40E_BW_CREDIT_DIVISOR);
+			dev_dbg(&vsi->back->pdev->dev,
+				"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+				ch->max_tx_rate,
+				credits,
+				ch->seid);
+		}
+		ret = i40e_rebuild_cloud_filters(vsi, ch->seid);
+		if (ret) {
+			dev_dbg(&vsi->back->pdev->dev,
+				"Failed to rebuild cloud filters for channel VSI %u\n",
+				ch->seid);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/**
  * i40e_prep_for_reset - prep for the core to reset
  * @pf: board private structure
  * @lock_acquired: indicates whether or not the lock has been acquired
@@ -7152,6 +9228,7 @@ static int i40e_reset(struct i40e_pf *pf)
  **/
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 {
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	struct i40e_hw *hw = &pf->hw;
 	u8 set_fc_aq_fail = 0;
 	i40e_status ret;
@@ -7177,7 +9254,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 		i40e_verify_eeprom(pf);
 
 	i40e_clear_pxe_mode(hw);
-	ret = i40e_get_capabilities(pf);
+	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
 	if (ret)
 		goto end_core_reset;
 
@@ -7234,7 +9311,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 	 * If there were VEBs but the reconstitution failed, we'll try
 	 * try to recover minimal use by getting the basic PF VSI working.
 	 */
-	if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) {
+	if (vsi->uplink_seid != pf->mac_seid) {
 		dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
 		/* find the one VEB connected to the MAC, and find orphans */
 		for (v = 0; v < I40E_MAX_VEB; v++) {
@@ -7258,8 +9335,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 					dev_info(&pf->pdev->dev,
 						 "rebuild of switch failed: %d, will try to set up simple PF connection\n",
 						 ret);
-					pf->vsi[pf->lan_vsi]->uplink_seid
-								= pf->mac_seid;
+					vsi->uplink_seid = pf->mac_seid;
 					break;
 				} else if (pf->veb[v]->uplink_seid == 0) {
 					dev_info(&pf->pdev->dev,
@@ -7270,10 +9346,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 		}
 	}
 
-	if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) {
+	if (vsi->uplink_seid == pf->mac_seid) {
 		dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
 		/* no VEB, so rebuild only the Main VSI */
-		ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]);
+		ret = i40e_add_vsi(vsi);
 		if (ret) {
 			dev_info(&pf->pdev->dev,
 				 "rebuild of Main VSI failed: %d\n", ret);
@@ -7281,6 +9357,35 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 		}
 	}
 
+	if (vsi->mqprio_qopt.max_rate[0]) {
+		u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+		u64 credits = 0;
+
+		do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+		ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+		if (ret)
+			goto end_unlock;
+
+		credits = max_tx_rate;
+		do_div(credits, I40E_BW_CREDIT_DIVISOR);
+		dev_dbg(&vsi->back->pdev->dev,
+			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+			max_tx_rate,
+			credits,
+			vsi->seid);
+	}
+
+	ret = i40e_rebuild_cloud_filters(vsi, vsi->seid);
+	if (ret)
+		goto end_unlock;
+
+	/* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
+	 * for this main VSI if they exist
+	 */
+	ret = i40e_rebuild_channels(vsi);
+	if (ret)
+		goto end_unlock;
+
 	/* Reconfigure hardware for allowing smaller MSS in the case
 	 * of TSO, so that we avoid the MDD being fired and causing
 	 * a reset in the case of small MSS+TSO.
@@ -7615,9 +9720,9 @@ static void i40e_service_task(struct work_struct *work)
  * i40e_service_timer - timer callback
  * @data: pointer to PF struct
  **/
-static void i40e_service_timer(unsigned long data)
+static void i40e_service_timer(struct timer_list *t)
 {
-	struct i40e_pf *pf = (struct i40e_pf *)data;
+	struct i40e_pf *pf = from_timer(pf, t, service_timer);
 
 	mod_timer(&pf->service_timer,
 		  round_jiffies(jiffies + pf->service_timer_period));
@@ -7674,7 +9779,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
 
 /**
  * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
- * @type: VSI pointer
+ * @vsi: VSI pointer
  * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
  *
  * On error: returns error code (negative)
@@ -8139,7 +10244,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 		pf->num_lan_qps = 1;
 		pf->num_lan_msix = 1;
 
-	} else if (!vectors_left) {
+	} else if (v_actual != v_budget) {
 		/* If we have limited resources, we will start with no vectors
 		 * for the special features and then allocate vectors to some
 		 * of these features based on the policy and at the end disable
@@ -8148,7 +10253,8 @@ static int i40e_init_msix(struct i40e_pf *pf)
 		int vec;
 
 		dev_info(&pf->pdev->dev,
-			 "MSI-X vector limit reached, attempting to redistribute vectors\n");
+			 "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
+			 v_actual, v_budget);
 		/* reserve the misc vector */
 		vec = v_actual - 1;
 
@@ -8196,6 +10302,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	    (pf->num_fdsb_msix == 0)) {
 		dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
 		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 	}
 	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
 	    (pf->num_vmdq_msix == 0)) {
@@ -8313,6 +10420,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
 				       I40E_FLAG_FD_SB_ENABLED	|
 				       I40E_FLAG_FD_ATR_ENABLED	|
 				       I40E_FLAG_VMDQ_ENABLED);
+			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 
 			/* rework the queue expectations without MSIX */
 			i40e_determine_queue_usage(pf);
@@ -8351,6 +10459,55 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_restore_interrupt_scheme - Restore the interrupt scheme
+ * @pf: private board data structure
+ *
+ * Restore the interrupt scheme that was cleared when we suspended the
+ * device. This should be called during resume to re-allocate the q_vectors
+ * and reacquire IRQs.
+ */
+static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
+{
+	int err, i;
+
+	/* We cleared the MSI and MSI-X flags when disabling the old interrupt
+	 * scheme. We need to re-enabled them here in order to attempt to
+	 * re-acquire the MSI or MSI-X vectors
+	 */
+	pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+
+	err = i40e_init_interrupt_scheme(pf);
+	if (err)
+		return err;
+
+	/* Now that we've re-acquired IRQs, we need to remap the vectors and
+	 * rings together again.
+	 */
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (pf->vsi[i]) {
+			err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
+			if (err)
+				goto err_unwind;
+			i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
+		}
+	}
+
+	err = i40e_setup_misc_vector(pf);
+	if (err)
+		goto err_unwind;
+
+	return 0;
+
+err_unwind:
+	while (i--) {
+		if (pf->vsi[i])
+			i40e_vsi_free_q_vectors(pf->vsi[i]);
+	}
+
+	return err;
+}
+
+/**
  * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
  * @pf: board private structure
  *
@@ -8363,13 +10520,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
 	struct i40e_hw *hw = &pf->hw;
 	int err = 0;
 
-	/* Only request the irq if this is the first time through, and
-	 * not when we're rebuilding after a Reset
-	 */
-	if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) {
+	/* Only request the IRQ once, the first time through. */
+	if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
 		err = request_irq(pf->msix_entries[0].vector,
 				  i40e_intr, 0, pf->int_name, pf);
 		if (err) {
+			clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
 			dev_info(&pf->pdev->dev,
 				 "request_irq for %s failed: %d\n",
 				 pf->int_name, err);
@@ -8385,51 +10541,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
 
 	i40e_flush(hw);
 
-	i40e_irq_dynamic_enable_icr0(pf, true);
+	i40e_irq_dynamic_enable_icr0(pf);
 
 	return err;
 }
 
 /**
- * i40e_config_rss_aq - Prepare for RSS using AQ commands
- * @vsi: vsi structure
- * @seed: RSS hash seed
- **/
-static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
-			      u8 *lut, u16 lut_size)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_hw *hw = &pf->hw;
-	int ret = 0;
-
-	if (seed) {
-		struct i40e_aqc_get_set_rss_key_data *seed_dw =
-			(struct i40e_aqc_get_set_rss_key_data *)seed;
-		ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
-		if (ret) {
-			dev_info(&pf->pdev->dev,
-				 "Cannot set RSS key, err %s aq_err %s\n",
-				 i40e_stat_str(hw, ret),
-				 i40e_aq_str(hw, hw->aq.asq_last_status));
-			return ret;
-		}
-	}
-	if (lut) {
-		bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
-
-		ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
-		if (ret) {
-			dev_info(&pf->pdev->dev,
-				 "Cannot set RSS lut, err %s aq_err %s\n",
-				 i40e_stat_str(hw, ret),
-				 i40e_aq_str(hw, hw->aq.asq_last_status));
-			return ret;
-		}
-	}
-	return ret;
-}
-
-/**
  * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
  * @vsi: Pointer to vsi structure
  * @seed: Buffter to store the hash keys
@@ -8476,46 +10593,6 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
 }
 
 /**
- * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
- * @vsi: VSI structure
- **/
-static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
-{
-	u8 seed[I40E_HKEY_ARRAY_SIZE];
-	struct i40e_pf *pf = vsi->back;
-	u8 *lut;
-	int ret;
-
-	if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
-		return 0;
-
-	if (!vsi->rss_size)
-		vsi->rss_size = min_t(int, pf->alloc_rss_size,
-				      vsi->num_queue_pairs);
-	if (!vsi->rss_size)
-		return -EINVAL;
-
-	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
-	if (!lut)
-		return -ENOMEM;
-	/* Use the user configured hash keys and lookup table if there is one,
-	 * otherwise use default
-	 */
-	if (vsi->rss_lut_user)
-		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
-	else
-		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
-	if (vsi->rss_hkey_user)
-		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
-	else
-		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
-	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
-	kfree(lut);
-
-	return ret;
-}
-
-/**
  * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
  * @vsi: Pointer to vsi structure
  * @seed: RSS hash seed
@@ -8913,8 +10990,8 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		    I40E_FLAG_MSIX_ENABLED;
 
 	/* Set default ITR */
-	pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
-	pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
+	pf->rx_itr_default = I40E_ITR_RX_DEF;
+	pf->tx_itr_default = I40E_ITR_TX_DEF;
 
 	/* Depending on PF configurations, it is possible that the RSS
 	 * maximum might end up larger than the available queues
@@ -9014,6 +11091,11 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	    (pf->hw.aq.fw_maj_ver >= 5)))
 		pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
 
+	/* Enable PTP L4 if FW > v6.0 */
+	if (pf->hw.mac.type == I40E_MAC_XL710 &&
+	    pf->hw.aq.fw_maj_ver >= 6)
+		pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
+
 	if (pf->hw.func_caps.vmdq) {
 		pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
 		pf->flags |= I40E_FLAG_VMDQ_ENABLED;
@@ -9079,9 +11161,13 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 		/* Enable filters and mark for reset */
 		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
 			need_reset = true;
-		/* enable FD_SB only if there is MSI-X vector */
-		if (pf->num_fdsb_msix > 0)
+		/* enable FD_SB only if there is MSI-X vector and no cloud
+		 * filters exist
+		 */
+		if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
 			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+		}
 	} else {
 		/* turn off filters, mark for reset and clear SW filter list */
 		if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
@@ -9090,6 +11176,8 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 		}
 		pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED |
 			       I40E_FLAG_FD_SB_AUTO_DISABLED);
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+
 		/* reset fd counters */
 		pf->fd_add_err = 0;
 		pf->fd_atr_cnt = 0;
@@ -9151,10 +11239,16 @@ static int i40e_set_features(struct net_device *netdev,
 	else
 		i40e_vlan_stripping_disable(vsi);
 
+	if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
+		dev_err(&pf->pdev->dev,
+			"Offloaded tc filters active, can't turn hw_tc_offload off");
+		return -EINVAL;
+	}
+
 	need_reset = i40e_set_ntuple(pf, features);
 
 	if (need_reset)
-		i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
 
 	return 0;
 }
@@ -9406,8 +11500,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
 				pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
 			else
 				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
-			i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED),
-				      true);
+			i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
 			break;
 		}
 	}
@@ -9555,12 +11648,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
 }
 
 /**
- * i40e_xdp - implements ndo_xdp for i40e
+ * i40e_xdp - implements ndo_bpf for i40e
  * @dev: netdevice
  * @xdp: XDP command
  **/
 static int i40e_xdp(struct net_device *dev,
-		    struct netdev_xdp *xdp)
+		    struct netdev_bpf *xdp)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_vsi *vsi = np->vsi;
@@ -9612,7 +11705,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_features_check	= i40e_features_check,
 	.ndo_bridge_getlink	= i40e_ndo_bridge_getlink,
 	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
-	.ndo_xdp		= i40e_xdp,
+	.ndo_bpf		= i40e_xdp,
 };
 
 /**
@@ -9671,7 +11764,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
 
 	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
-		netdev->hw_features |= NETIF_F_NTUPLE;
+		netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+
 	hw_features = hw_enc_features		|
 		      NETIF_F_HW_VLAN_CTAG_TX	|
 		      NETIF_F_HW_VLAN_CTAG_RX;
@@ -9849,6 +11943,31 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 
 		enabled_tc = i40e_pf_get_tc_map(pf);
 
+		/* Source pruning is enabled by default, so the flag is
+		 * negative logic - if it's set, we need to fiddle with
+		 * the VSI to disable source pruning.
+		 */
+		if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+			memset(&ctxt, 0, sizeof(ctxt));
+			ctxt.seid = pf->main_vsi_seid;
+			ctxt.pf_num = pf->hw.pf_id;
+			ctxt.vf_num = 0;
+			ctxt.info.valid_sections |=
+				     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+			ctxt.info.switch_id =
+				   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+			ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "update vsi failed, err %s aq_err %s\n",
+					 i40e_stat_str(&pf->hw, ret),
+					 i40e_aq_str(&pf->hw,
+						     pf->hw.aq.asq_last_status));
+				ret = -ENOENT;
+				goto err;
+			}
+		}
+
 		/* MFP mode setup queue map and update VSI */
 		if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
 		    !(pf->hw.func_caps.iscsi)) { /* NIC type PF */
@@ -10951,14 +13070,16 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 	*/
 
 	if ((pf->hw.pf_id == 0) &&
-	    !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+	    !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
 		flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+		pf->last_sw_conf_flags = flags;
+	}
 
 	if (pf->hw.pf_id == 0) {
 		u16 valid_flags;
 
 		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
-		ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags,
+		ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
 						NULL);
 		if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
 			dev_info(&pf->pdev->dev,
@@ -10968,6 +13089,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 					     pf->hw.aq.asq_last_status));
 			/* not a fatal problem, just keep going */
 		}
+		pf->last_sw_conf_valid_flags = valid_flags;
 	}
 
 	/* first time setup */
@@ -10988,6 +13110,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 			vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
 		if (!vsi) {
 			dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
+			i40e_cloud_filter_exit(pf);
 			i40e_fdir_teardown(pf);
 			return -EAGAIN;
 		}
@@ -11039,6 +13162,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
 static void i40e_determine_queue_usage(struct i40e_pf *pf)
 {
 	int queues_left;
+	int q_max;
 
 	pf->num_lan_qps = 0;
 
@@ -11063,6 +13187,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 			       I40E_FLAG_DCB_ENABLED	|
 			       I40E_FLAG_SRIOV_ENABLED	|
 			       I40E_FLAG_VMDQ_ENABLED);
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 	} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
 				  I40E_FLAG_FD_SB_ENABLED |
 				  I40E_FLAG_FD_ATR_ENABLED |
@@ -11077,6 +13202,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 			       I40E_FLAG_FD_ATR_ENABLED	|
 			       I40E_FLAG_DCB_ENABLED	|
 			       I40E_FLAG_VMDQ_ENABLED);
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 	} else {
 		/* Not enough queues for all TCs */
 		if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
@@ -11085,10 +13211,12 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 					I40E_FLAG_DCB_ENABLED);
 			dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
 		}
-		pf->num_lan_qps = max_t(int, pf->rss_size_max,
-					num_online_cpus());
-		pf->num_lan_qps = min_t(int, pf->num_lan_qps,
-					pf->hw.func_caps.num_tx_qp);
+
+		/* limit lan qps to the smaller of qps, cpus or msix */
+		q_max = max_t(int, pf->rss_size_max, num_online_cpus());
+		q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
+		q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
+		pf->num_lan_qps = q_max;
 
 		queues_left -= pf->num_lan_qps;
 	}
@@ -11098,6 +13226,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 			queues_left -= 1; /* save 1 queue for FD */
 		} else {
 			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 			dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
 		}
 	}
@@ -11304,6 +13433,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	hw->bus.bus_id = pdev->bus->number;
 	pf->instance = pfs_found;
 
+	/* Select something other than the 802.1ad ethertype for the
+	 * switch to use internally and drop on ingress.
+	 */
+	hw->switch_tag = 0xffff;
+	hw->first_tag = ETH_P_8021AD;
+	hw->second_tag = ETH_P_8021Q;
+
 	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
 	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
 
@@ -11380,11 +13516,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 i40e_nvm_version_str(hw));
 
 	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-	    hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR)
+	    hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
 		dev_info(&pdev->dev,
 			 "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
-	else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR ||
-		 hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1))
+	else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
 		dev_info(&pdev->dev,
 			 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
 
@@ -11395,7 +13530,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
 
 	i40e_clear_pxe_mode(hw);
-	err = i40e_get_capabilities(pf);
+	err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
 	if (err)
 		goto err_adminq_setup;
 
@@ -11454,7 +13589,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 #endif /* CONFIG_I40E_DCB */
 
 	/* set up periodic task facility */
-	setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf);
+	timer_setup(&pf->service_timer, i40e_service_timer, 0);
 	pf->service_timer_period = HZ;
 
 	INIT_WORK(&pf->service_task, i40e_service_task);
@@ -11506,6 +13641,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
 		goto err_vsis;
 	}
+	INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
 
 	/* Make sure flow control is set according to current settings */
 	err = i40e_set_fc(hw, &set_fc_aq_fail, true);
@@ -11777,7 +13913,7 @@ static void i40e_remove(struct pci_dev *pdev)
 	/* no more scheduling of any task */
 	set_bit(__I40E_SUSPENDED, pf->state);
 	set_bit(__I40E_DOWN, pf->state);
-	if (pf->service_timer.data)
+	if (pf->service_timer.function)
 		del_timer_sync(&pf->service_timer);
 	if (pf->service_task.func)
 		cancel_work_sync(&pf->service_task);
@@ -11812,6 +13948,8 @@ static void i40e_remove(struct pci_dev *pdev)
 	if (pf->vsi[pf->lan_vsi])
 		i40e_vsi_release(pf->vsi[pf->lan_vsi]);
 
+	i40e_cloud_filter_exit(pf);
+
 	/* remove attached clients */
 	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
 		ret_code = i40e_lan_del_device(pf);
@@ -11937,6 +14075,28 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
 }
 
 /**
+ * i40e_pci_error_reset_prepare - prepare device driver for pci reset
+ * @pdev: PCI device information struct
+ */
+static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+	i40e_prep_for_reset(pf, false);
+}
+
+/**
+ * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+	i40e_reset_and_rebuild(pf, false, false);
+}
+
+/**
  * i40e_pci_error_resume - restart operations after PCI error recovery
  * @pdev: PCI device information struct
  *
@@ -12021,6 +14181,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
 
 	del_timer_sync(&pf->service_timer);
 	cancel_work_sync(&pf->service_task);
+	i40e_cloud_filter_exit(pf);
 	i40e_fdir_teardown(pf);
 
 	/* Client close must be called explicitly here because the timer
@@ -12046,20 +14207,26 @@ static void i40e_shutdown(struct pci_dev *pdev)
 	}
 }
 
-#ifdef CONFIG_PM
 /**
- * i40e_suspend - PCI callback for moving to D3
- * @pdev: PCI device information struct
+ * i40e_suspend - PM callback for moving to D3
+ * @dev: generic device information structure
  **/
-static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused i40e_suspend(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 	struct i40e_hw *hw = &pf->hw;
-	int retval = 0;
 
-	set_bit(__I40E_SUSPENDED, pf->state);
+	/* If we're already suspended, then there is nothing to do */
+	if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
+		return 0;
+
 	set_bit(__I40E_DOWN, pf->state);
 
+	/* Ensure service task will not be running */
+	del_timer_sync(&pf->service_timer);
+	cancel_work_sync(&pf->service_task);
+
 	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
 
@@ -12068,81 +14235,70 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
 	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
 
-	i40e_stop_misc_vector(pf);
-	if (pf->msix_entries) {
-		synchronize_irq(pf->msix_entries[0].vector);
-		free_irq(pf->msix_entries[0].vector, pf);
-	}
-	retval = pci_save_state(pdev);
-	if (retval)
-		return retval;
-
-	pci_wake_from_d3(pdev, pf->wol_en);
-	pci_set_power_state(pdev, PCI_D3hot);
+	/* Clear the interrupt scheme and release our IRQs so that the system
+	 * can safely hibernate even when there are a large number of CPUs.
+	 * Otherwise hibernation might fail when mapping all the vectors back
+	 * to CPU0.
+	 */
+	i40e_clear_interrupt_scheme(pf);
 
-	return retval;
+	return 0;
 }
 
 /**
- * i40e_resume - PCI callback for waking up from D3
- * @pdev: PCI device information struct
+ * i40e_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
  **/
-static int i40e_resume(struct pci_dev *pdev)
+static int __maybe_unused i40e_resume(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
-	u32 err;
+	int err;
 
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	/* pci_restore_state() clears dev->state_saves, so
-	 * call pci_save_state() again to restore it.
-	 */
-	pci_save_state(pdev);
+	/* If we're not suspended, then there is nothing to do */
+	if (!test_bit(__I40E_SUSPENDED, pf->state))
+		return 0;
 
-	err = pci_enable_device_mem(pdev);
+	/* We cleared the interrupt scheme when we suspended, so we need to
+	 * restore it now to resume device functionality.
+	 */
+	err = i40e_restore_interrupt_scheme(pf);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
-		return err;
+		dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n",
+			err);
 	}
-	pci_set_master(pdev);
 
-	/* no wakeup events while running */
-	pci_wake_from_d3(pdev, false);
-
-	/* handling the reset will rebuild the device state */
-	if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
-		clear_bit(__I40E_DOWN, pf->state);
-		if (pf->msix_entries) {
-			err = request_irq(pf->msix_entries[0].vector,
-					  i40e_intr, 0, pf->int_name, pf);
-			if (err) {
-				dev_err(&pf->pdev->dev,
-					"request_irq for %s failed: %d\n",
-					pf->int_name, err);
-			}
-		}
-		i40e_reset_and_rebuild(pf, false, false);
-	}
+	clear_bit(__I40E_DOWN, pf->state);
+	i40e_reset_and_rebuild(pf, false, false);
+
+	/* Clear suspended state last after everything is recovered */
+	clear_bit(__I40E_SUSPENDED, pf->state);
+
+	/* Restart the service task */
+	mod_timer(&pf->service_timer,
+		  round_jiffies(jiffies + pf->service_timer_period));
 
 	return 0;
 }
 
-#endif
 static const struct pci_error_handlers i40e_err_handler = {
 	.error_detected = i40e_pci_error_detected,
 	.slot_reset = i40e_pci_error_slot_reset,
+	.reset_prepare = i40e_pci_error_reset_prepare,
+	.reset_done = i40e_pci_error_reset_done,
 	.resume = i40e_pci_error_resume,
 };
 
+static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
+
 static struct pci_driver i40e_driver = {
 	.name     = i40e_driver_name,
 	.id_table = i40e_pci_tbl,
 	.probe    = i40e_probe,
 	.remove   = i40e_remove,
-#ifdef CONFIG_PM
-	.suspend  = i40e_suspend,
-	.resume   = i40e_resume,
-#endif
+	.driver   = {
+		.pm = &i40e_pm_ops,
+	},
 	.shutdown = i40e_shutdown,
 	.err_handler = &i40e_err_handler,
 	.sriov_configure = i40e_pci_sriov_configure,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index d591b3e6bd7c..0ccab0a5d717 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -311,13 +311,10 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
 static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
 					u16 offset, u16 *data)
 {
-	i40e_status ret_code = 0;
-
 	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
-		ret_code = i40e_read_nvm_word_aq(hw, offset, data);
-	else
-		ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
-	return ret_code;
+		return i40e_read_nvm_word_aq(hw, offset, data);
+
+	return i40e_read_nvm_word_srctl(hw, offset, data);
 }
 
 /**
@@ -331,7 +328,7 @@ static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 			       u16 *data)
 {
-	i40e_status ret_code = 0;
+	i40e_status ret_code;
 
 	ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
 	if (ret_code)
@@ -446,13 +443,10 @@ static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw,
 					  u16 offset, u16 *words,
 					  u16 *data)
 {
-	i40e_status ret_code = 0;
-
 	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
-		ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data);
-	else
-		ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
-	return ret_code;
+		return i40e_read_nvm_buffer_aq(hw, offset, words, data);
+
+	return i40e_read_nvm_buffer_srctl(hw, offset, words, data);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index a39b13197891..3bb6659db822 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -190,7 +190,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
 				struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
 						u16 flags,
-						u16 valid_flags,
+						u16 valid_flags, u8 mode,
 				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_request_resource(struct i40e_hw *hw,
 				enum i40e_aq_resources_ids resource,
@@ -283,6 +283,22 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
 		struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw,
 				   struct i40e_asq_cmd_details *cmd_details);
+i40e_status
+i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count);
+enum i40e_status_code
+i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 vsi,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count);
+enum i40e_status_code
+i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count);
+i40e_status
+i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count);
 i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw,
 			       struct i40e_lldp_variables *lldp_cfg);
 /* i40e_common */
@@ -360,6 +376,15 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
 				u32 reg_addr, u32 reg_val,
 				struct i40e_asq_cmd_details *cmd_details);
 void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
+i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
+				     u8 phy_select, u8 dev_addr,
+				     u32 reg_addr, u32 reg_val,
+				     struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
+				     u8 phy_select, u8 dev_addr,
+				     u32 reg_addr, u32 *reg_val,
+				     struct i40e_asq_cmd_details *cmd_details);
+
 i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
 					    u16 reg, u8 phy_addr, u16 *value);
 i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 86ca27f72f02..c234758dad15 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -2794,7 +2794,7 @@
 #define I40E_GLV_RUPP_MAX_INDEX 383
 #define I40E_GLV_RUPP_RUPP_SHIFT 0
 #define I40E_GLV_RUPP_RUPP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_RUPP_RUPP_SHIFT)
-#define I40E_GLV_TEPC(_VSI) (0x00344000 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_TEPC(_i) (0x00344000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
 #define I40E_GLV_TEPC_MAX_INDEX 383
 #define I40E_GLV_TEPC_TEPC_SHIFT 0
 #define I40E_GLV_TEPC_TEPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_TEPC_TEPC_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 120c68f78951..d6d352a6e6ea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -960,14 +960,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 {
 	enum i40e_latency_range new_latency_range = rc->latency_range;
 	u32 new_itr = rc->itr;
-	int bytes_per_int;
+	int bytes_per_usec;
 	unsigned int usecs, estimated_usecs;
 
 	if (rc->total_packets == 0 || !rc->itr)
 		return false;
 
 	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_int = rc->total_bytes / usecs;
+	bytes_per_usec = rc->total_bytes / usecs;
 
 	/* The calculations in this algorithm depend on interrupts actually
 	 * firing at the ITR rate. This may not happen if the packet rate is
@@ -993,18 +993,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	 */
 	switch (new_latency_range) {
 	case I40E_LOWEST_LATENCY:
-		if (bytes_per_int > 10)
+		if (bytes_per_usec > 10)
 			new_latency_range = I40E_LOW_LATENCY;
 		break;
 	case I40E_LOW_LATENCY:
-		if (bytes_per_int > 20)
+		if (bytes_per_usec > 20)
 			new_latency_range = I40E_BULK_LATENCY;
-		else if (bytes_per_int <= 10)
+		else if (bytes_per_usec <= 10)
 			new_latency_range = I40E_LOWEST_LATENCY;
 		break;
 	case I40E_BULK_LATENCY:
 	default:
-		if (bytes_per_int <= 20)
+		if (bytes_per_usec <= 20)
 			new_latency_range = I40E_LOW_LATENCY;
 		break;
 	}
@@ -2117,6 +2117,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		if (!skb) {
 			xdp.data = page_address(rx_buffer->page) +
 				   rx_buffer->page_offset;
+			xdp_set_data_meta_invalid(&xdp);
 			xdp.data_hard_start = xdp.data -
 					      i40e_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;
@@ -2211,9 +2212,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 	u32 val;
 
 	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
-	      /* Don't clear PBA because that can cause lost interrupts that
-	       * came in while we were cleaning/polling
-	       */
+	      I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
 	      (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
 	      (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
 
@@ -2250,7 +2249,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 
 	/* If we don't have MSIX, then we only need to re-enable icr0 */
 	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
-		i40e_irq_dynamic_enable_icr0(vsi->back, false);
+		i40e_irq_dynamic_enable_icr0(vsi->back);
 		return;
 	}
 
@@ -3176,38 +3175,12 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	/* write last descriptor with EOP bit */
 	td_cmd |= I40E_TX_DESC_CMD_EOP;
 
-	/* We can OR these values together as they both are checked against
-	 * 4 below and at this point desc_count will be used as a boolean value
-	 * after this if/else block.
+	/* We OR these values together to check both against 4 (WB_STRIDE)
+	 * below. This is safe since we don't re-use desc_count afterwards.
 	 */
 	desc_count |= ++tx_ring->packet_stride;
 
-	/* Algorithm to optimize tail and RS bit setting:
-	 * if queue is stopped
-	 *	mark RS bit
-	 *	reset packet counter
-	 * else if xmit_more is supported and is true
-	 *	advance packet counter to 4
-	 *	reset desc_count to 0
-	 *
-	 * if desc_count >= 4
-	 *	mark RS bit
-	 *	reset packet counter
-	 * if desc_count > 0
-	 *	update tail
-	 *
-	 * Note: If there are less than 4 descriptors
-	 * pending and interrupts were disabled the service task will
-	 * trigger a force WB.
-	 */
-	if (netif_xmit_stopped(txring_txq(tx_ring))) {
-		goto do_rs;
-	} else if (skb->xmit_more) {
-		/* set stride to arm on next packet and reset desc_count */
-		tx_ring->packet_stride = WB_STRIDE;
-		desc_count = 0;
-	} else if (desc_count >= WB_STRIDE) {
-do_rs:
+	if (desc_count >= WB_STRIDE) {
 		/* write last descriptor with RS bit set */
 		td_cmd |= I40E_TX_DESC_CMD_RS;
 		tx_ring->packet_stride = 0;
@@ -3228,7 +3201,7 @@ do_rs:
 	first->next_to_watch = tx_desc;
 
 	/* notify HW of packet */
-	if (desc_count) {
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
 		writel(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 2f848bc5e391..fbae1182e2ea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -38,8 +38,10 @@
 #define I40E_ITR_8K                0x003E
 #define I40E_ITR_4K                0x007A
 #define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            I40E_ITR_20K
-#define I40E_ITR_TX_DEF            I40E_ITR_20K
+#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+				    I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+				    I40E_ITR_DYNAMIC)
 #define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
 #define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
 #define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
@@ -206,7 +208,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
 }
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define I40E_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+#define I40E_RX_BUFFER_WRITE	32	/* Must be power of 2 */
 #define I40E_RX_INCREMENT(r, i) \
 	do {					\
 		(i)++;				\
@@ -342,6 +344,7 @@ struct i40e_rx_queue_stats {
 enum i40e_ring_state_t {
 	__I40E_TX_FDIR_INIT_DONE,
 	__I40E_TX_XPS_INIT_DONE,
+	__I40E_RING_STATE_NBITS /* must be last */
 };
 
 /* some useful defines for virtchannel interface, which
@@ -366,7 +369,7 @@ struct i40e_ring {
 		struct i40e_tx_buffer *tx_bi;
 		struct i40e_rx_buffer *rx_bi;
 	};
-	unsigned long state;
+	DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
 	u16 queue_index;		/* Queue number of ring */
 	u8 dcb_tc;			/* Traffic class of ring */
 	u8 __iomem *tail;
@@ -423,6 +426,8 @@ struct i40e_ring {
 					 * i40e_clean_rx_ring_irq() is called
 					 * for this ring.
 					 */
+
+	struct i40e_channel *ch;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index fd4bbdd88b57..00d4833e9925 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -46,6 +46,9 @@
 /* Max default timeout in ms, */
 #define I40E_MAX_NVM_TIMEOUT		18000
 
+/* Max timeout in ms for the phy to respond */
+#define I40E_MAX_PHY_TIMEOUT		500
+
 /* Switch from ms to the 1usec global time (this is the GTIME resolution) */
 #define I40E_MS_TO_GTIME(time)		((time) * 1000)
 
@@ -268,6 +271,10 @@ struct i40e_phy_info {
 					     I40E_PHY_TYPE_OFFSET)
 #define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \
 					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_AOC BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \
+					     I40E_PHY_TYPE_OFFSET)
 #define I40E_HW_CAP_MAX_GPIO			30
 /* Capabilities of a PF or a VF or the whole device */
 struct i40e_hw_capabilities {
@@ -276,6 +283,16 @@ struct i40e_hw_capabilities {
 #define I40E_NVM_IMAGE_TYPE_CLOUD	0x2
 #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD	0x3
 
+	/* Cloud filter modes:
+	 * Mode1: Filter on L4 port only
+	 * Mode2: Filter for non-tunneled traffic
+	 * Mode3: Filter for tunnel traffic
+	 */
+#define I40E_CLOUD_FILTER_MODE1	0x6
+#define I40E_CLOUD_FILTER_MODE2	0x7
+#define I40E_CLOUD_FILTER_MODE3	0x8
+#define I40E_SWITCH_MODE_MASK	0xF
+
 	u32  management_mode;
 	u32  mng_protocols_over_mctp;
 #define I40E_MNG_PROTOCOL_PLDM		0x2
@@ -428,6 +445,18 @@ struct i40e_nvm_access {
 	u8 data[1];
 };
 
+/* (Q)SFP module access definitions */
+#define I40E_I2C_EEPROM_DEV_ADDR	0xA0
+#define I40E_I2C_EEPROM_DEV_ADDR2	0xA2
+#define I40E_MODULE_TYPE_ADDR		0x00
+#define I40E_MODULE_REVISION_ADDR	0x01
+#define I40E_MODULE_SFF_8472_COMP	0x5E
+#define I40E_MODULE_SFF_8472_SWAP	0x5C
+#define I40E_MODULE_SFF_ADDR_MODE	0x04
+#define I40E_MODULE_TYPE_QSFP_PLUS	0x0D
+#define I40E_MODULE_TYPE_QSFP28		0x11
+#define I40E_MODULE_QSFP_MAX_LEN	640
+
 /* PCI bus types */
 enum i40e_bus_type {
 	i40e_bus_type_unknown = 0,
@@ -598,8 +627,15 @@ struct i40e_hw {
 	struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
 
 #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0)
+#define I40E_HW_FLAG_802_1AD_CAPABLE        BIT_ULL(1)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE  BIT_ULL(2)
 	u64 flags;
 
+	/* Used in set switch config AQ command */
+	u16 switch_tag;
+	u16 first_tag;
+	u16 second_tag;
+
 	/* debug mask */
 	u32 debug_mask;
 	char err_str[16];
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 4d1e670f490e..f8a794b72462 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -154,15 +154,30 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
 
 /**
  * i40e_vc_disable_vf
- * @pf: pointer to the PF info
  * @vf: pointer to the VF info
  *
- * Disable the VF through a SW reset
+ * Disable the VF through a SW reset.
  **/
-static inline void i40e_vc_disable_vf(struct i40e_pf *pf, struct i40e_vf *vf)
+static inline void i40e_vc_disable_vf(struct i40e_vf *vf)
 {
+	int i;
+
 	i40e_vc_notify_vf_reset(vf);
-	i40e_reset_vf(vf, false);
+
+	/* We want to ensure that an actual reset occurs initiated after this
+	 * function was called. However, we do not want to wait forever, so
+	 * we'll give a reasonable time and print a message if we failed to
+	 * ensure a reset.
+	 */
+	for (i = 0; i < 20; i++) {
+		if (i40e_reset_vf(vf, false))
+			return;
+		usleep_range(10000, 20000);
+	}
+
+	dev_warn(&vf->pf->pdev->dev,
+		 "Failed to initiate reset for VF %d after 200 milliseconds\n",
+		 vf->vf_id);
 }
 
 /**
@@ -258,7 +273,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 	struct i40e_hw *hw = &pf->hw;
 	u16 vsi_queue_id, pf_queue_id;
 	enum i40e_queue_type qtype;
-	u16 next_q, vector_id;
+	u16 next_q, vector_id, size;
 	u32 reg, reg_idx;
 	u16 itr_idx = 0;
 
@@ -288,9 +303,11 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 				     vsi_queue_id + 1));
 	}
 
-	next_q = find_first_bit(&linklistmap,
-				(I40E_MAX_VSI_QP *
-				 I40E_VIRTCHNL_SUPPORTED_QTYPES));
+	size = I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES;
+	next_q = find_first_bit(&linklistmap, size);
+	if (unlikely(next_q == size))
+		goto irq_list_done;
+
 	vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 	qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
 	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
@@ -298,7 +315,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 
 	wr32(hw, reg_idx, reg);
 
-	while (next_q < (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) {
+	while (next_q < size) {
 		switch (qtype) {
 		case I40E_QUEUE_TYPE_RX:
 			reg_idx = I40E_QINT_RQCTL(pf_queue_id);
@@ -312,12 +329,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 			break;
 		}
 
-		next_q = find_next_bit(&linklistmap,
-				       (I40E_MAX_VSI_QP *
-					I40E_VIRTCHNL_SUPPORTED_QTYPES),
-				       next_q + 1);
-		if (next_q <
-		    (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) {
+		next_q = find_next_bit(&linklistmap, size, next_q + 1);
+		if (next_q < size) {
 			vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 			qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
 			pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id,
@@ -423,6 +436,9 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
 	       (sizeof(struct virtchnl_iwarp_qv_info) *
 						(qvlist_info->num_vectors - 1));
 	vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+	if (!vf->qvlist_info)
+		return -ENOMEM;
+
 	vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
 
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
@@ -621,7 +637,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
 	rx_ctx.dsize = 1;
 
 	/* default values */
-	rx_ctx.lrxqthresh = 2;
+	rx_ctx.lrxqthresh = 1;
 	rx_ctx.crcstrip = 1;
 	rx_ctx.prefena = 1;
 	rx_ctx.l2tsel = 1;
@@ -815,6 +831,14 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 	 */
 	clear_bit(I40E_VF_STATE_INIT, &vf->vf_states);
 
+	/* It's possible the VF had requeuested more queues than the default so
+	 * do the accounting here when we're about to free them.
+	 */
+	if (vf->num_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) {
+		pf->queues_left += vf->num_queue_pairs -
+				   I40E_DEFAULT_QUEUES_PER_VF;
+	}
+
 	/* free vsi & disconnect it from the parent uplink */
 	if (vf->lan_vsi_idx) {
 		i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
@@ -853,7 +877,8 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 	}
 	/* reset some of the state variables keeping track of the resources */
 	vf->num_queue_pairs = 0;
-	vf->vf_states = 0;
+	clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states);
+	clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states);
 }
 
 /**
@@ -868,12 +893,27 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 	int total_queue_pairs = 0;
 	int ret;
 
+	if (vf->num_req_queues &&
+	    vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
+		pf->num_vf_qps = vf->num_req_queues;
+	else
+		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
+
 	/* allocate hw vsi context & associated resources */
 	ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
 	if (ret)
 		goto error_alloc;
 	total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
 
+	/* We account for each VF to get a default number of queue pairs.  If
+	 * the VF has now requested more, we need to account for that to make
+	 * certain we never request more queues than we actually have left in
+	 * HW.
+	 */
+	if (total_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF)
+		pf->queues_left -=
+			total_queue_pairs - I40E_DEFAULT_QUEUES_PER_VF;
+
 	if (vf->trusted)
 		set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
 	else
@@ -1008,8 +1048,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
 		set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
 		clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
 		/* Do not notify the client during VF init */
-		if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
-				       &vf->vf_states))
+		if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
+					&vf->vf_states))
 			i40e_notify_client_of_vf_reset(pf, abs_vf_id);
 		vf->num_vlan = 0;
 	}
@@ -1026,9 +1066,9 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
  * @vf: pointer to the VF structure
  * @flr: VFLR was issued or not
  *
- * reset the VF
+ * Returns true if the VF is reset, false otherwise.
  **/
-void i40e_reset_vf(struct i40e_vf *vf, bool flr)
+bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
@@ -1036,9 +1076,11 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
 	u32 reg;
 	int i;
 
-	/* If VFs have been disabled, there is no need to reset */
+	/* If the VFs have been disabled, this means something else is
+	 * resetting the VF, so we shouldn't continue.
+	 */
 	if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
-		return;
+		return false;
 
 	i40e_trigger_vf_reset(vf, flr);
 
@@ -1075,6 +1117,8 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
 
 	i40e_flush(hw);
 	clear_bit(__I40E_VF_DISABLE, pf->state);
+
+	return true;
 }
 
 /**
@@ -1086,8 +1130,10 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
  * VF, then do all the waiting in one chunk, and finally finish restoring each
  * VF after the wait. This is useful during PF routines which need to reset
  * all VFs, as otherwise it must perform these resets in a serialized fashion.
+ *
+ * Returns true if any VFs were reset, and false otherwise.
  **/
-void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
 {
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_vf *vf;
@@ -1096,11 +1142,11 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
 
 	/* If we don't have any VFs, then there is nothing to reset */
 	if (!pf->num_alloc_vfs)
-		return;
+		return false;
 
 	/* If VFs have been disabled, there is no need to reset */
 	if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
-		return;
+		return false;
 
 	/* Begin reset on all VFs at once */
 	for (v = 0; v < pf->num_alloc_vfs; v++)
@@ -1175,6 +1221,8 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
 
 	i40e_flush(hw);
 	clear_bit(__I40E_VF_DISABLE, pf->state);
+
+	return true;
 }
 
 /**
@@ -1308,7 +1356,7 @@ err_alloc:
 		i40e_free_vfs(pf);
 err_iov:
 	/* Re-enable interrupt 0. */
-	i40e_irq_dynamic_enable_icr0(pf, false);
+	i40e_irq_dynamic_enable_icr0(pf);
 	return ret;
 }
 
@@ -1377,8 +1425,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	if (num_vfs) {
 		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
 			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
-			i40e_do_reset_safe(pf,
-					   BIT_ULL(__I40E_PF_RESET_REQUESTED));
+			i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
 		}
 		return i40e_pci_sriov_enable(pdev, num_vfs);
 	}
@@ -1386,7 +1433,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	if (!pci_vfs_assigned(pf->pdev)) {
 		i40e_free_vfs(pf);
 		pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
-		i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
+		i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
 	} else {
 		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
 		return -EINVAL;
@@ -1537,6 +1584,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_IWARP)) {
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_IWARP;
 		set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states);
+	} else {
+		clear_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states);
 	}
 
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -1579,6 +1628,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 					VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
 	}
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+
 	vfres->num_vsis = num_vsis;
 	vfres->num_queue_pairs = vf->num_queue_pairs;
 	vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@ -1987,6 +2039,57 @@ error_param:
 }
 
 /**
+ * i40e_vc_request_queues_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number.  If the request is successful, PF will reset the VF and
+ * return 0.  If unsuccessful, PF will send message informing VF of number of
+ * available queues and return result of sending VF a message.
+ **/
+static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen)
+{
+	struct virtchnl_vf_res_request *vfres =
+		(struct virtchnl_vf_res_request *)msg;
+	int req_pairs = vfres->num_queue_pairs;
+	int cur_pairs = vf->num_queue_pairs;
+	struct i40e_pf *pf = vf->pf;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
+		return -EINVAL;
+
+	if (req_pairs <= 0) {
+		dev_err(&pf->pdev->dev,
+			"VF %d tried to request %d queues.  Ignoring.\n",
+			vf->vf_id, req_pairs);
+	} else if (req_pairs > I40E_MAX_VF_QUEUES) {
+		dev_err(&pf->pdev->dev,
+			"VF %d tried to request more than %d queues.\n",
+			vf->vf_id,
+			I40E_MAX_VF_QUEUES);
+		vfres->num_queue_pairs = I40E_MAX_VF_QUEUES;
+	} else if (req_pairs - cur_pairs > pf->queues_left) {
+		dev_warn(&pf->pdev->dev,
+			 "VF %d requested %d more queues, but only %d left.\n",
+			 vf->vf_id,
+			 req_pairs - cur_pairs,
+			 pf->queues_left);
+		vfres->num_queue_pairs = pf->queues_left + cur_pairs;
+	} else {
+		/* successful request */
+		vf->num_req_queues = req_pairs;
+		i40e_vc_notify_vf_reset(vf);
+		i40e_reset_vf(vf, false);
+		return 0;
+	}
+
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0,
+				      (u8 *)vfres, sizeof(vfres));
+}
+
+/**
  * i40e_vc_get_stats_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -2708,6 +2811,9 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
 		ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen);
 		break;
+	case VIRTCHNL_OP_REQUEST_QUEUES:
+		ret = i40e_vc_request_queues_msg(vf, msg, msglen);
+		break;
 
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
@@ -2779,6 +2885,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	struct i40e_mac_filter *f;
 	struct i40e_vf *vf;
 	int ret = 0;
+	struct hlist_node *h;
 	int bkt;
 
 	/* validate the request */
@@ -2817,7 +2924,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	/* Delete all the filters for this VSI - we're going to kill it
 	 * anyway.
 	 */
-	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist)
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
 		__i40e_del_filter(vsi, f);
 
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
@@ -2840,7 +2947,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	}
 
 	/* Force the VF driver stop so it has to reload with new MAC address */
-	i40e_vc_disable_vf(pf, vf);
+	i40e_vc_disable_vf(vf);
 	dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
 
 error_param:
@@ -2848,6 +2955,34 @@ error_param:
 }
 
 /**
+ * i40e_vsi_has_vlans - True if VSI has configured VLANs
+ * @vsi: pointer to the vsi
+ *
+ * Check if a VSI has configured any VLANs. False if we have a port VLAN or if
+ * we have no configured VLANs. Do not call while holding the
+ * mac_filter_hash_lock.
+ */
+static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi)
+{
+	bool have_vlans;
+
+	/* If we have a port VLAN, then the VSI cannot have any VLANs
+	 * configured, as all MAC/VLAN filters will be assigned to the PVID.
+	 */
+	if (vsi->info.pvid)
+		return false;
+
+	/* Since we don't have a PVID, we know that if the device is in VLAN
+	 * mode it must be because of a VLAN filter configured on this VSI.
+	 */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	have_vlans = i40e_is_vsi_in_vlan(vsi);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	return have_vlans;
+}
+
+/**
  * i40e_ndo_set_vf_port_vlan
  * @netdev: network interface device structure
  * @vf_id: VF identifier
@@ -2899,10 +3034,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 		/* duplicate request, so just return success */
 		goto error_pvid;
 
-	/* Locked once because multiple functions below iterate list */
-	spin_lock_bh(&vsi->mac_filter_hash_lock);
-
-	if (le16_to_cpu(vsi->info.pvid) == 0 && i40e_is_vsi_in_vlan(vsi)) {
+	if (i40e_vsi_has_vlans(vsi)) {
 		dev_err(&pf->pdev->dev,
 			"VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n",
 			vf_id);
@@ -2910,11 +3042,14 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 		 * the right thing by reconfiguring his network correctly
 		 * and then reloading the VF driver.
 		 */
-		i40e_vc_disable_vf(pf, vf);
+		i40e_vc_disable_vf(vf);
 		/* During reset the VF got a new VSI, so refresh the pointer. */
 		vsi = pf->vsi[vf->lan_vsi_idx];
 	}
 
+	/* Locked once because multiple functions below iterate list */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
 	/* Check for condition where there was already a port VLAN ID
 	 * filter set and now it is being deleted by setting it to zero.
 	 * Additionally check for the condition where there was a port
@@ -2987,8 +3122,6 @@ error_pvid:
 	return ret;
 }
 
-#define I40E_BW_CREDIT_DIVISOR 50     /* 50Mbps per BW credit */
-#define I40E_MAX_BW_INACTIVE_ACCUM 4  /* device can accumulate 4 credits max */
 /**
  * i40e_ndo_set_vf_bw
  * @netdev: network interface device structure
@@ -3004,7 +3137,6 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_vsi *vsi;
 	struct i40e_vf *vf;
-	int speed = 0;
 	int ret = 0;
 
 	/* validate the request */
@@ -3029,48 +3161,10 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
 		goto error;
 	}
 
-	switch (pf->hw.phy.link_info.link_speed) {
-	case I40E_LINK_SPEED_40GB:
-		speed = 40000;
-		break;
-	case I40E_LINK_SPEED_25GB:
-		speed = 25000;
-		break;
-	case I40E_LINK_SPEED_20GB:
-		speed = 20000;
-		break;
-	case I40E_LINK_SPEED_10GB:
-		speed = 10000;
-		break;
-	case I40E_LINK_SPEED_1GB:
-		speed = 1000;
-		break;
-	default:
-		break;
-	}
-
-	if (max_tx_rate > speed) {
-		dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n",
-			max_tx_rate, vf->vf_id);
-		ret = -EINVAL;
+	ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+	if (ret)
 		goto error;
-	}
-
-	if ((max_tx_rate < 50) && (max_tx_rate > 0)) {
-		dev_warn(&pf->pdev->dev, "Setting max Tx rate to minimum usable value of 50Mbps.\n");
-		max_tx_rate = 50;
-	}
 
-	/* Tx rate credits are in values of 50Mbps, 0 is disabled*/
-	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
-					  max_tx_rate / I40E_BW_CREDIT_DIVISOR,
-					  I40E_MAX_BW_INACTIVE_ACCUM, NULL);
-	if (ret) {
-		dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n",
-			ret);
-		ret = -EIO;
-		goto error;
-	}
 	vf->tx_rate = max_tx_rate;
 error:
 	return ret;
@@ -3279,14 +3373,11 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
 
 	vf = &pf->vf[vf_id];
 
-	if (!vf)
-		return -EINVAL;
 	if (setting == vf->trusted)
 		goto out;
 
 	vf->trusted = setting;
-	i40e_vc_notify_vf_reset(vf);
-	i40e_reset_vf(vf, false);
+	i40e_vc_disable_vf(vf);
 	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
 		 vf_id, setting ? "" : "un");
 out:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 1f4b0c504368..5efc4f92bb37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -56,7 +56,6 @@ enum i40e_vf_states {
 	I40E_VF_STATE_INIT = 0,
 	I40E_VF_STATE_ACTIVE,
 	I40E_VF_STATE_IWARPENA,
-	I40E_VF_STATE_FCOEENA,
 	I40E_VF_STATE_DISABLED,
 	I40E_VF_STATE_MC_PROMISC,
 	I40E_VF_STATE_UC_PROMISC,
@@ -97,6 +96,7 @@ struct i40e_vf {
 	u16 lan_vsi_id;		/* ID as used by firmware */
 
 	u8 num_queue_pairs;	/* num of qps assigned to VF vsis */
+	u8 num_req_queues;	/* num of requested qps */
 	u64 num_mdd_events;	/* num of mdd events detected */
 	/* num of continuous malformed or invalid msgs detected */
 	u64 num_invalid_msgs;
@@ -121,8 +121,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
 int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 			   u32 v_retval, u8 *msg, u16 msglen);
 int i40e_vc_process_vflr_event(struct i40e_pf *pf);
-void i40e_reset_vf(struct i40e_vf *vf, bool flr);
-void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
+bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
+bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
 void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
 
 /* VF configuration related iplink handlers */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 83e63e55c4b4..06b04572c518 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -34,7 +34,15 @@
  */
 
 #define I40E_FW_API_VERSION_MAJOR	0x0001
-#define I40E_FW_API_VERSION_MINOR	0x0005
+#define I40E_FW_API_VERSION_MINOR_X722	0x0005
+#define I40E_FW_API_VERSION_MINOR_X710	0x0007
+
+#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
+					I40E_FW_API_VERSION_MINOR_X710 : \
+					I40E_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs  */
+#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
 
 struct i40e_aq_desc {
 	__le16 flags;
@@ -236,6 +244,8 @@ enum i40e_admin_queue_opc {
 	i40e_aqc_opc_set_phy_debug		= 0x0622,
 	i40e_aqc_opc_upload_ext_phy_fm		= 0x0625,
 	i40e_aqc_opc_run_phy_activity		= 0x0626,
+	i40e_aqc_opc_set_phy_register		= 0x0628,
+	i40e_aqc_opc_get_phy_register		= 0x0629,
 
 	/* NVM commands */
 	i40e_aqc_opc_nvm_read			= 0x0701,
@@ -761,7 +771,22 @@ struct i40e_aqc_set_switch_config {
 #define I40E_AQ_SET_SWITCH_CFG_PROMISC		0x0001
 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER	0x0002
 	__le16	valid_flags;
-	u8	reserved[12];
+	/* The ethertype in switch_tag is dropped on ingress and used
+	 * internally by the switch. Set this to zero for the default
+	 * of 0x88a8 (802.1ad). Should be zero for firmware API
+	 * versions lower than 1.7.
+	 */
+	__le16	switch_tag;
+	/* The ethertypes in first_tag and second_tag are used to
+	 * match the outer and inner VLAN tags (respectively) when HW
+	 * double VLAN tagging is enabled via the set port parameters
+	 * AQ command. Otherwise these are both ignored. Set them to
+	 * zero for their defaults of 0x8100 (802.1Q). Should be zero
+	 * for firmware API versions lower than 1.7.
+	 */
+	__le16	first_tag;
+	__le16	second_tag;
+	u8	reserved[6];
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config);
@@ -1314,14 +1339,16 @@ struct i40e_aqc_add_remove_cloud_filters {
 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT	0
 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK	(0x3FF << \
 					I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT)
-	u8	reserved2[4];
+	u8	big_buffer_flag;
+#define I40E_AQC_ADD_CLOUD_CMD_BB	1
+	u8	reserved2[3];
 	__le32	addr_high;
 	__le32	addr_low;
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters);
 
-struct i40e_aqc_add_remove_cloud_filters_element_data {
+struct i40e_aqc_cloud_filters_element_data {
 	u8	outer_mac[6];
 	u8	inner_mac[6];
 	__le16	inner_vlan;
@@ -1333,6 +1360,9 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
 		struct {
 			u8 data[16];
 		} v6;
+		struct {
+			__le16 data[8];
+		} raw_v6;
 	} ipaddr;
 	__le16	flags;
 #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT			0
@@ -1351,6 +1381,10 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
 #define I40E_AQC_ADD_CLOUD_FILTER_IMAC			0x000A
 #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC	0x000B
 #define I40E_AQC_ADD_CLOUD_FILTER_IIP			0x000C
+/* 0x0010 to 0x0017 is for custom filters */
+#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT		0x0010 /* Dest IP + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT		0x0011 /* Dest MAC + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT		0x0012 /* Dest MAC + VLAN + L4 Port */
 
 #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE		0x0080
 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT			6
@@ -1385,6 +1419,49 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
 	u8	response_reserved[7];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data);
+
+/* i40e_aqc_cloud_filters_element_bb is used when
+ * I40E_AQC_ADD_CLOUD_CMD_BB flag is set.
+ */
+struct i40e_aqc_cloud_filters_element_bb {
+	struct i40e_aqc_cloud_filters_element_data element;
+	u16     general_fields[32];
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0	0
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1	1
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2	2
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0	3
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1	4
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2	5
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0	6
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1	7
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2	8
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0	9
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1	10
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2	11
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0	12
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1	13
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2	14
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0	15
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1	16
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2	17
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3	18
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4	19
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5	20
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6	21
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7	22
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0	23
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1	24
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2	25
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3	26
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4	27
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5	28
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6	29
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7	30
+};
+
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb);
+
 struct i40e_aqc_remove_cloud_filters_completion {
 	__le16 perfect_ovlan_used;
 	__le16 perfect_ovlan_free;
@@ -1396,6 +1473,60 @@ struct i40e_aqc_remove_cloud_filters_completion {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion);
 
+/* Replace filter Command 0x025F
+ * uses the i40e_aqc_replace_cloud_filters,
+ * and the generic indirect completion structure
+ */
+struct i40e_filter_data {
+	u8 filter_type;
+	u8 input[3];
+};
+
+I40E_CHECK_STRUCT_LEN(4, i40e_filter_data);
+
+struct i40e_aqc_replace_cloud_filters_cmd {
+	u8      valid_flags;
+#define I40E_AQC_REPLACE_L1_FILTER		0x0
+#define I40E_AQC_REPLACE_CLOUD_FILTER		0x1
+#define I40E_AQC_GET_CLOUD_FILTERS		0x2
+#define I40E_AQC_MIRROR_CLOUD_FILTER		0x4
+#define I40E_AQC_HIGH_PRIORITY_CLOUD_FILTER	0x8
+	u8      old_filter_type;
+	u8      new_filter_type;
+	u8      tr_bit;
+	u8      reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd);
+
+struct i40e_aqc_replace_cloud_filters_cmd_buf {
+	u8      data[32];
+/* Filter type INPUT codes*/
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_ENTRIES_MAX	3
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED	BIT(7)
+
+/* Field Vector offsets */
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_MAC_DA	0
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_ETH	6
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG	7
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_VLAN	8
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_OVLAN	9
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN	10
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY	11
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC	12
+/* big FLU */
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IP_DA	14
+/* big FLU */
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_OIP_DA	15
+
+#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_INNER_VLAN	37
+	struct i40e_filter_data filters[8];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf);
+
 /* Add Mirror Rule (indirect or direct 0x0260)
  * Delete Mirror Rule (indirect or direct 0x0261)
  * note: some rule types (4,5) do not use an external buffer.
@@ -1722,6 +1853,8 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_10GBASE_CR1_CU		= 0xB,
 	I40E_PHY_TYPE_10GBASE_AOC		= 0xC,
 	I40E_PHY_TYPE_40GBASE_AOC		= 0xD,
+	I40E_PHY_TYPE_UNRECOGNIZED		= 0xE,
+	I40E_PHY_TYPE_UNSUPPORTED		= 0xF,
 	I40E_PHY_TYPE_100BASE_TX		= 0x11,
 	I40E_PHY_TYPE_1000BASE_T		= 0x12,
 	I40E_PHY_TYPE_10GBASE_T			= 0x13,
@@ -1740,7 +1873,12 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_25GBASE_CR		= 0x20,
 	I40E_PHY_TYPE_25GBASE_SR		= 0x21,
 	I40E_PHY_TYPE_25GBASE_LR		= 0x22,
-	I40E_PHY_TYPE_MAX
+	I40E_PHY_TYPE_25GBASE_AOC		= 0x23,
+	I40E_PHY_TYPE_25GBASE_ACC		= 0x24,
+	I40E_PHY_TYPE_MAX,
+	I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP	= 0xFD,
+	I40E_PHY_TYPE_EMPTY			= 0xFE,
+	I40E_PHY_TYPE_DEFAULT			= 0xFF,
 };
 
 #define I40E_LINK_SPEED_100MB_SHIFT	0x1
@@ -1797,6 +1935,8 @@ struct i40e_aq_get_phy_abilities_resp {
 #define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
 #define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
 #define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+#define I40E_AQ_PHY_TYPE_EXT_25G_AOC	0x10
+#define I40E_AQ_PHY_TYPE_EXT_25G_ACC	0x20
 	u8	fec_cfg_curr_mod_ext_info;
 #define I40E_AQ_ENABLE_FEC_KR		0x01
 #define I40E_AQ_ENABLE_FEC_RS		0x02
@@ -1930,19 +2070,31 @@ struct i40e_aqc_get_link_status {
 #define I40E_AQ_25G_SERDES_UCODE_ERR	0X04
 #define I40E_AQ_25G_NIMB_UCODE_ERR	0X05
 	u8	loopback; /* use defines from i40e_aqc_set_lb_mode */
+/* Since firmware API 1.7 loopback field keeps power class info as well */
+#define I40E_AQ_LOOPBACK_MASK		0x07
+#define I40E_AQ_PWR_CLASS_SHIFT_LB	6
+#define I40E_AQ_PWR_CLASS_MASK_LB	(0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB)
 	__le16	max_frame_size;
 	u8	config;
 #define I40E_AQ_CONFIG_FEC_KR_ENA	0x01
 #define I40E_AQ_CONFIG_FEC_RS_ENA	0x02
 #define I40E_AQ_CONFIG_CRC_ENA		0x04
 #define I40E_AQ_CONFIG_PACING_MASK	0x78
-	u8	power_desc;
+	union {
+		struct {
+			u8	power_desc;
 #define I40E_AQ_LINK_POWER_CLASS_1	0x00
 #define I40E_AQ_LINK_POWER_CLASS_2	0x01
 #define I40E_AQ_LINK_POWER_CLASS_3	0x02
 #define I40E_AQ_LINK_POWER_CLASS_4	0x03
 #define I40E_AQ_PWR_CLASS_MASK		0x03
-	u8	reserved[4];
+			u8	reserved[4];
+		};
+		struct {
+			u8	link_type[4];
+			u8	link_type_ext;
+		};
+	};
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
@@ -2022,6 +2174,22 @@ struct i40e_aqc_run_phy_activity {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity);
 
+/* Set PHY Register command (0x0628) */
+/* Get PHY Register command (0x0629) */
+struct i40e_aqc_phy_register_access {
+	u8	phy_interface;
+#define I40E_AQ_PHY_REG_ACCESS_INTERNAL	0
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL	1
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE	2
+	u8	dev_address;
+	u8	reserved1[2];
+	__le32	reg_address;
+	__le32	reg_value;
+	u8	reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 8d3a2bfe186a..7d70bf69b249 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1042,6 +1042,75 @@ do_retry:
 }
 
 /**
+ * i40evf_aq_set_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: new register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Reset the external PHY.
+ **/
+i40e_status i40evf_aq_set_phy_register(struct i40e_hw *hw,
+				       u8 phy_select, u8 dev_addr,
+				       u32 reg_addr, u32 reg_val,
+				       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_phy_register_access *cmd =
+		(struct i40e_aqc_phy_register_access *)&desc.params.raw;
+	i40e_status status;
+
+	i40evf_fill_default_direct_cmd_desc(&desc,
+					    i40e_aqc_opc_set_phy_register);
+
+	cmd->phy_interface = phy_select;
+	cmd->dev_address = dev_addr;
+	cmd->reg_address = cpu_to_le32(reg_addr);
+	cmd->reg_value = cpu_to_le32(reg_val);
+
+	status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40evf_aq_get_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: read register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Reset the external PHY.
+ **/
+i40e_status i40evf_aq_get_phy_register(struct i40e_hw *hw,
+				       u8 phy_select, u8 dev_addr,
+				       u32 reg_addr, u32 *reg_val,
+				       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_phy_register_access *cmd =
+		(struct i40e_aqc_phy_register_access *)&desc.params.raw;
+	i40e_status status;
+
+	i40evf_fill_default_direct_cmd_desc(&desc,
+					    i40e_aqc_opc_get_phy_register);
+
+	cmd->phy_interface = phy_select;
+	cmd->dev_address = dev_addr;
+	cmd->reg_address = cpu_to_le32(reg_addr);
+
+	status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+	if (!status)
+		*reg_val = le32_to_cpu(cmd->reg_value);
+
+	return status;
+}
+
+/**
  * i40e_aq_send_msg_to_pf
  * @hw: pointer to the hardware structure
  * @v_opcode: opcodes for VF-PF communication
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index c9836bba487d..b624b5994075 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -111,6 +111,15 @@ i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw,
 				u32 reg_addr, u32 reg_val,
 				struct i40e_asq_cmd_details *cmd_details);
 void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
+i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
+				     u8 phy_select, u8 dev_addr,
+				     u32 reg_addr, u32 reg_val,
+				     struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
+				     u8 phy_select, u8 dev_addr,
+				     u32 reg_addr, u32 *reg_val,
+				     struct i40e_asq_cmd_details *cmd_details);
+
 i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page,
 				   u16 reg, u8 phy_addr, u16 *value);
 i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page,
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index c32c62462c84..fe817e2b6fef 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -358,14 +358,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 {
 	enum i40e_latency_range new_latency_range = rc->latency_range;
 	u32 new_itr = rc->itr;
-	int bytes_per_int;
+	int bytes_per_usec;
 	unsigned int usecs, estimated_usecs;
 
 	if (rc->total_packets == 0 || !rc->itr)
 		return false;
 
 	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_int = rc->total_bytes / usecs;
+	bytes_per_usec = rc->total_bytes / usecs;
 
 	/* The calculations in this algorithm depend on interrupts actually
 	 * firing at the ITR rate. This may not happen if the packet rate is
@@ -391,18 +391,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	 */
 	switch (new_latency_range) {
 	case I40E_LOWEST_LATENCY:
-		if (bytes_per_int > 10)
+		if (bytes_per_usec > 10)
 			new_latency_range = I40E_LOW_LATENCY;
 		break;
 	case I40E_LOW_LATENCY:
-		if (bytes_per_int > 20)
+		if (bytes_per_usec > 20)
 			new_latency_range = I40E_BULK_LATENCY;
-		else if (bytes_per_int <= 10)
+		else if (bytes_per_usec <= 10)
 			new_latency_range = I40E_LOWEST_LATENCY;
 		break;
 	case I40E_BULK_LATENCY:
 	default:
-		if (bytes_per_int <= 20)
+		if (bytes_per_usec <= 20)
 			new_latency_range = I40E_LOW_LATENCY;
 		break;
 	}
@@ -1409,9 +1409,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 	u32 val;
 
 	val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-	      /* Don't clear PBA because that can cause lost interrupts that
-	       * came in while we were cleaning/polling
-	       */
+	      I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
 	      (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
 	      (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 0d9f98bc07bd..8d26c85d12e1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -38,8 +38,10 @@
 #define I40E_ITR_8K                0x003E
 #define I40E_ITR_4K                0x007A
 #define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            I40E_ITR_20K
-#define I40E_ITR_TX_DEF            I40E_ITR_20K
+#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+				    I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+				    I40E_ITR_DYNAMIC)
 #define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
 #define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
 #define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
@@ -189,7 +191,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
 }
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define I40E_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+#define I40E_RX_BUFFER_WRITE	32	/* Must be power of 2 */
 #define I40E_RX_INCREMENT(r, i) \
 	do {					\
 		(i)++;				\
@@ -325,6 +327,7 @@ struct i40e_rx_queue_stats {
 enum i40e_ring_state_t {
 	__I40E_TX_FDIR_INIT_DONE,
 	__I40E_TX_XPS_INIT_DONE,
+	__I40E_RING_STATE_NBITS /* must be last */
 };
 
 /* some useful defines for virtchannel interface, which
@@ -348,7 +351,7 @@ struct i40e_ring {
 		struct i40e_tx_buffer *tx_bi;
 		struct i40e_rx_buffer *rx_bi;
 	};
-	unsigned long state;
+	DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
 	u16 queue_index;		/* Queue number of ring */
 	u8 dcb_tc;			/* Traffic class of ring */
 	u8 __iomem *tail;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 2ea919d9cdcf..213b773dfad6 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -46,6 +46,9 @@
 /* Max default timeout in ms, */
 #define I40E_MAX_NVM_TIMEOUT		18000
 
+/* Max timeout in ms for the phy to respond */
+#define I40E_MAX_PHY_TIMEOUT		500
+
 /* Switch from ms to the 1usec global time (this is the GTIME resolution) */
 #define I40E_MS_TO_GTIME(time)		((time) * 1000)
 
@@ -401,6 +404,18 @@ struct i40e_nvm_access {
 	u8 data[1];
 };
 
+/* (Q)SFP module access definitions */
+#define I40E_I2C_EEPROM_DEV_ADDR	0xA0
+#define I40E_I2C_EEPROM_DEV_ADDR2	0xA2
+#define I40E_MODULE_TYPE_ADDR		0x00
+#define I40E_MODULE_REVISION_ADDR	0x01
+#define I40E_MODULE_SFF_8472_COMP	0x5E
+#define I40E_MODULE_SFF_8472_SWAP	0x5C
+#define I40E_MODULE_SFF_ADDR_MODE	0x04
+#define I40E_MODULE_TYPE_QSFP_PLUS	0x0D
+#define I40E_MODULE_TYPE_QSFP28		0x11
+#define I40E_MODULE_QSFP_MAX_LEN	640
+
 /* PCI bus types */
 enum i40e_bus_type {
 	i40e_bus_type_unknown = 0,
@@ -556,11 +571,19 @@ struct i40e_hw {
 	/* LLDP/DCBX Status */
 	u16 dcbx_status;
 
+#define I40E_HW_FLAG_802_1AD_CAPABLE        BIT_ULL(1)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE  BIT_ULL(2)
+
 	/* DCBX info */
 	struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */
 	struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
 	struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
 
+	/* Used in set switch config AQ command */
+	u16 switch_tag;
+	u16 first_tag;
+	u16 second_tag;
+
 	/* debug mask */
 	u32 debug_mask;
 	char err_str[16];
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 82f69031e5cd..de0af521d602 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -102,6 +102,7 @@ struct i40e_vsi {
 #define I40E_TX_CTXTDESC(R, i) \
 	(&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
 #define MAX_QUEUES 16
+#define I40EVF_MAX_REQ_QUEUES 4
 
 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
 #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
@@ -200,6 +201,7 @@ struct i40evf_adapter {
 	struct list_head vlan_filter_list;
 	char misc_vector_name[IFNAMSIZ + 9];
 	int num_active_queues;
+	int num_req_queues;
 
 	/* TX */
 	struct i40e_ring *tx_rings;
@@ -220,21 +222,22 @@ struct i40evf_adapter {
 
 	u32 flags;
 #define I40EVF_FLAG_RX_CSUM_ENABLED		BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED		BIT(5)
-#define I40EVF_FLAG_MQ_CAPABLE			BIT(6)
-#define I40EVF_FLAG_PF_COMMS_FAILED		BIT(8)
-#define I40EVF_FLAG_RESET_PENDING		BIT(9)
-#define I40EVF_FLAG_RESET_NEEDED		BIT(10)
-#define I40EVF_FLAG_WB_ON_ITR_CAPABLE		BIT(11)
-#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE	BIT(12)
-#define I40EVF_FLAG_ADDR_SET_BY_PF		BIT(13)
-#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(14)
-#define I40EVF_FLAG_CLIENT_NEEDS_OPEN		BIT(15)
-#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE		BIT(16)
-#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS	BIT(17)
-#define I40EVF_FLAG_PROMISC_ON			BIT(18)
-#define I40EVF_FLAG_ALLMULTI_ON			BIT(19)
-#define I40EVF_FLAG_LEGACY_RX			BIT(20)
+#define I40EVF_FLAG_IMIR_ENABLED		BIT(1)
+#define I40EVF_FLAG_MQ_CAPABLE			BIT(2)
+#define I40EVF_FLAG_PF_COMMS_FAILED		BIT(3)
+#define I40EVF_FLAG_RESET_PENDING		BIT(4)
+#define I40EVF_FLAG_RESET_NEEDED		BIT(5)
+#define I40EVF_FLAG_WB_ON_ITR_CAPABLE		BIT(6)
+#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE	BIT(7)
+#define I40EVF_FLAG_ADDR_SET_BY_PF		BIT(8)
+#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(9)
+#define I40EVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)
+#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE		BIT(11)
+#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS	BIT(12)
+#define I40EVF_FLAG_PROMISC_ON			BIT(13)
+#define I40EVF_FLAG_ALLMULTI_ON			BIT(14)
+#define I40EVF_FLAG_LEGACY_RX			BIT(15)
+#define I40EVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
 /* duplicates for common code */
 #define I40E_FLAG_DCB_ENABLED			0
 #define I40E_FLAG_RX_CSUM_ENABLED		I40EVF_FLAG_RX_CSUM_ENABLED
@@ -349,6 +352,7 @@ void i40evf_deconfigure_queues(struct i40evf_adapter *adapter);
 void i40evf_enable_queues(struct i40evf_adapter *adapter);
 void i40evf_disable_queues(struct i40evf_adapter *adapter);
 void i40evf_map_queues(struct i40evf_adapter *adapter);
+int i40evf_request_queues(struct i40evf_adapter *adapter, int num);
 void i40evf_add_ether_addrs(struct i40evf_adapter *adapter);
 void i40evf_del_ether_addrs(struct i40evf_adapter *adapter);
 void i40evf_add_vlans(struct i40evf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 65874d6b3ab9..da006fa3fec1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -669,7 +669,7 @@ static void i40evf_get_channels(struct net_device *netdev,
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 
 	/* Report maximum channels */
-	ch->max_combined = adapter->num_active_queues;
+	ch->max_combined = I40EVF_MAX_REQ_QUEUES;
 
 	ch->max_other = NONQ_VECS;
 	ch->other_count = NONQ_VECS;
@@ -678,6 +678,41 @@ static void i40evf_get_channels(struct net_device *netdev,
 }
 
 /**
+ * i40evf_set_channels: set the new channel count
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Negotiate a new number of channels with the PF then do a reset.  During
+ * reset we'll realloc queues and fix the RSS table.  Returns 0 on success,
+ * negative on failure.
+ **/
+static int i40evf_set_channels(struct net_device *netdev,
+			       struct ethtool_channels *ch)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	int num_req = ch->combined_count;
+
+	if (num_req != adapter->num_active_queues &&
+	    !(adapter->vf_res->vf_cap_flags &
+	      VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) {
+		dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n");
+		return -EINVAL;
+	}
+
+	/* All of these should have already been checked by ethtool before this
+	 * even gets to us, but just to be sure.
+	 */
+	if (num_req <= 0 || num_req > I40EVF_MAX_REQ_QUEUES)
+		return -EINVAL;
+
+	if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS)
+		return -EINVAL;
+
+	adapter->num_req_queues = num_req;
+	return i40evf_request_queues(adapter, num_req);
+}
+
+/**
  * i40evf_get_rxfh_key_size - get the RSS hash key size
  * @netdev: network interface device structure
  *
@@ -785,6 +820,7 @@ static const struct ethtool_ops i40evf_ethtool_ops = {
 	.get_rxfh		= i40evf_get_rxfh,
 	.set_rxfh		= i40evf_set_rxfh,
 	.get_channels		= i40evf_get_channels,
+	.set_channels		= i40evf_set_channels,
 	.get_rxfh_key_size	= i40evf_get_rxfh_key_size,
 	.get_link_ksettings	= i40evf_get_link_ksettings,
 };
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 1825d956bb00..ca2ebdbd24d7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -46,7 +46,7 @@ static const char i40evf_driver_string[] =
 
 #define DRV_VERSION_MAJOR 3
 #define DRV_VERSION_MINOR 0
-#define DRV_VERSION_BUILD 0
+#define DRV_VERSION_BUILD 1
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD) \
@@ -430,57 +430,26 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
  * group the rings as "efficiently" as possible.  You would add new
  * mapping configurations in here.
  **/
-static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter)
+static void i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter)
 {
+	int rings_remaining = adapter->num_active_queues;
+	int ridx = 0, vidx = 0;
 	int q_vectors;
-	int v_start = 0;
-	int rxr_idx = 0, txr_idx = 0;
-	int rxr_remaining = adapter->num_active_queues;
-	int txr_remaining = adapter->num_active_queues;
-	int i, j;
-	int rqpv, tqpv;
-	int err = 0;
 
 	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
-	/* The ideal configuration...
-	 * We have enough vectors to map one per queue.
-	 */
-	if (q_vectors >= (rxr_remaining * 2)) {
-		for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
-			i40evf_map_vector_to_rxq(adapter, v_start, rxr_idx);
-
-		for (; txr_idx < txr_remaining; v_start++, txr_idx++)
-			i40evf_map_vector_to_txq(adapter, v_start, txr_idx);
-		goto out;
-	}
+	for (; ridx < rings_remaining; ridx++) {
+		i40evf_map_vector_to_rxq(adapter, vidx, ridx);
+		i40evf_map_vector_to_txq(adapter, vidx, ridx);
 
-	/* If we don't have enough vectors for a 1-to-1
-	 * mapping, we'll have to group them so there are
-	 * multiple queues per vector.
-	 * Re-adjusting *qpv takes care of the remainder.
-	 */
-	for (i = v_start; i < q_vectors; i++) {
-		rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
-		for (j = 0; j < rqpv; j++) {
-			i40evf_map_vector_to_rxq(adapter, i, rxr_idx);
-			rxr_idx++;
-			rxr_remaining--;
-		}
-	}
-	for (i = v_start; i < q_vectors; i++) {
-		tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
-		for (j = 0; j < tqpv; j++) {
-			i40evf_map_vector_to_txq(adapter, i, txr_idx);
-			txr_idx++;
-			txr_remaining--;
-		}
+		/* In the case where we have more queues than vectors, continue
+		 * round-robin on vectors until all queues are mapped.
+		 */
+		if (++vidx >= q_vectors)
+			vidx = 0;
 	}
 
-out:
 	adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS;
-
-	return err;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -546,6 +515,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 	unsigned int vector, q_vectors;
 	unsigned int rx_int_idx = 0, tx_int_idx = 0;
 	int irq_num, err;
+	int cpu;
 
 	i40evf_irq_disable(adapter);
 	/* Decrement for Other and TCP Timer vectors */
@@ -584,10 +554,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 		q_vector->affinity_notify.release =
 						   i40evf_irq_affinity_release;
 		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
-		/* get_cpu_mask returns a static constant mask with
-		 * a permanent lifetime so it's ok to use here.
+		/* Spread the IRQ affinity hints across online CPUs. Note that
+		 * get_cpu_mask returns a mask with a permanent lifetime so
+		 * it's safe to use as a hint for irq_set_affinity_hint.
 		 */
-		irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
+		cpu = cpumask_local_spread(q_vector->v_idx, -1);
+		irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
 	}
 
 	return 0;
@@ -908,6 +880,8 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
 		list_add_tail(&f->list, &adapter->mac_filter_list);
 		f->add = true;
 		adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
+	} else {
+		f->remove = false;
 	}
 
 	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
@@ -1217,9 +1191,18 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 {
 	int i, num_active_queues;
 
-	num_active_queues = min_t(int,
-				  adapter->vsi_res->num_queue_pairs,
-				  (int)(num_online_cpus()));
+	/* If we're in reset reallocating queues we don't actually know yet for
+	 * certain the PF gave us the number of queues we asked for but we'll
+	 * assume it did.  Once basic reset is finished we'll confirm once we
+	 * start negotiating config with PF.
+	 */
+	if (adapter->num_req_queues)
+		num_active_queues = adapter->num_req_queues;
+	else
+		num_active_queues = min_t(int,
+					  adapter->vsi_res->num_queue_pairs,
+					  (int)(num_online_cpus()));
+
 
 	adapter->tx_rings = kcalloc(num_active_queues,
 				    sizeof(struct i40e_ring), GFP_KERNEL);
@@ -1240,7 +1223,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		tx_ring->netdev = adapter->netdev;
 		tx_ring->dev = &adapter->pdev->dev;
 		tx_ring->count = adapter->tx_desc_count;
-		tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF);
+		tx_ring->tx_itr_setting = I40E_ITR_TX_DEF;
 		if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
 			tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
 
@@ -1249,7 +1232,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		rx_ring->netdev = adapter->netdev;
 		rx_ring->dev = &adapter->pdev->dev;
 		rx_ring->count = adapter->rx_desc_count;
-		rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF);
+		rx_ring->rx_itr_setting = I40E_ITR_RX_DEF;
 	}
 
 	adapter->num_active_queues = num_active_queues;
@@ -1568,12 +1551,53 @@ static void i40evf_free_rss(struct i40evf_adapter *adapter)
 }
 
 /**
+ * i40evf_reinit_interrupt_scheme - Reallocate queues and vectors
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	if (netif_running(netdev))
+		i40evf_free_traffic_irqs(adapter);
+	i40evf_free_misc_irq(adapter);
+	i40evf_reset_interrupt_capability(adapter);
+	i40evf_free_q_vectors(adapter);
+	i40evf_free_queues(adapter);
+
+	err =  i40evf_init_interrupt_scheme(adapter);
+	if (err)
+		goto err;
+
+	netif_tx_stop_all_queues(netdev);
+
+	err = i40evf_request_misc_irq(adapter);
+	if (err)
+		goto err;
+
+	set_bit(__I40E_VSI_DOWN, adapter->vsi.state);
+
+	i40evf_map_rings_to_vectors(adapter);
+
+	if (RSS_AQ(adapter))
+		adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS;
+	else
+		err = i40evf_init_rss(adapter);
+err:
+	return err;
+}
+
+/**
  * i40evf_watchdog_timer - Periodic call-back timer
  * @data: pointer to adapter disguised as unsigned long
  **/
-static void i40evf_watchdog_timer(unsigned long data)
+static void i40evf_watchdog_timer(struct timer_list *t)
 {
-	struct i40evf_adapter *adapter = (struct i40evf_adapter *)data;
+	struct i40evf_adapter *adapter = from_timer(adapter, t,
+						    watchdog_timer);
 
 	schedule_work(&adapter->watchdog_task);
 	/* timer will be rescheduled in watchdog task */
@@ -1913,8 +1937,15 @@ continue_reset:
 	if (err)
 		dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n",
 			 err);
+	adapter->aq_required = 0;
 
-	adapter->aq_required = I40EVF_FLAG_AQ_GET_CONFIG;
+	if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) {
+		err = i40evf_reinit_interrupt_scheme(adapter);
+		if (err)
+			goto reset_err;
+	}
+
+	adapter->aq_required |= I40EVF_FLAG_AQ_GET_CONFIG;
 	adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS;
 
 	/* re-add all MAC filters */
@@ -1944,6 +1975,15 @@ continue_reset:
 		if (err)
 			goto reset_err;
 
+		if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) {
+			err = i40evf_request_traffic_irqs(adapter,
+							  netdev->name);
+			if (err)
+				goto reset_err;
+
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+		}
+
 		i40evf_configure(adapter);
 
 		i40evf_up_complete(adapter);
@@ -2386,10 +2426,6 @@ out_err:
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
-#define I40EVF_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX |\
-			      NETIF_F_HW_VLAN_CTAG_RX |\
-			      NETIF_F_HW_VLAN_CTAG_FILTER)
-
 /**
  * i40evf_fix_features - fix up the netdev feature bits
  * @netdev: our net device
@@ -2402,9 +2438,11 @@ static netdev_features_t i40evf_fix_features(struct net_device *netdev,
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 
-	features &= ~I40EVF_VLAN_FEATURES;
-	if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
-		features |= I40EVF_VLAN_FEATURES;
+	if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+		features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+			      NETIF_F_HW_VLAN_CTAG_RX |
+			      NETIF_F_HW_VLAN_CTAG_FILTER);
+
 	return features;
 }
 
@@ -2459,9 +2497,9 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw)
 int i40evf_process_config(struct i40evf_adapter *adapter)
 {
 	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	int i, num_req_queues = adapter->num_req_queues;
 	struct net_device *netdev = adapter->netdev;
 	struct i40e_vsi *vsi = &adapter->vsi;
-	int i;
 	netdev_features_t hw_enc_features;
 	netdev_features_t hw_features;
 
@@ -2475,6 +2513,23 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 		return -ENODEV;
 	}
 
+	if (num_req_queues &&
+	    num_req_queues != adapter->vsi_res->num_queue_pairs) {
+		/* Problem.  The PF gave us fewer queues than what we had
+		 * negotiated in our request.  Need a reset to see if we can't
+		 * get back to a working state.
+		 */
+		dev_err(&adapter->pdev->dev,
+			"Requested %d queues, but PF only gave us %d.\n",
+			num_req_queues,
+			adapter->vsi_res->num_queue_pairs);
+		adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+		adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
+		i40evf_schedule_reset(adapter);
+		return -ENODEV;
+	}
+	adapter->num_req_queues = 0;
+
 	hw_enc_features = NETIF_F_SG			|
 			  NETIF_F_IP_CSUM		|
 			  NETIF_F_IPV6_CSUM		|
@@ -2518,9 +2573,17 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	 */
 	hw_features = hw_enc_features;
 
+	/* Enable VLAN features if supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+		hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
+				NETIF_F_HW_VLAN_CTAG_RX);
+
 	netdev->hw_features |= hw_features;
 
-	netdev->features |= hw_features | I40EVF_VLAN_FEATURES;
+	netdev->features |= hw_features;
+
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+		netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	adapter->vsi.id = adapter->vsi_res->vsi_id;
 
@@ -2686,9 +2749,7 @@ static void i40evf_init_task(struct work_struct *work)
 		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
 	}
 
-	init_timer(&adapter->watchdog_timer);
-	adapter->watchdog_timer.function = &i40evf_watchdog_timer;
-	adapter->watchdog_timer.data = (unsigned long)adapter;
+	timer_setup(&adapter->watchdog_timer, i40evf_watchdog_timer, 0);
 	mod_timer(&adapter->watchdog_timer, jiffies + 1);
 
 	adapter->tx_desc_count = I40EVF_DEFAULT_TXD;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 85876f4fb1fb..46c8b8a3907c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -52,7 +52,7 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter,
 
 	err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
 	if (err)
-		dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n",
+		dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n",
 			op, i40evf_stat_str(hw, err),
 			i40evf_aq_str(hw, hw->aq.asq_last_status));
 	return err;
@@ -160,7 +160,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
 	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP |
-	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
+	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
@@ -385,6 +386,33 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
 }
 
 /**
+ * i40evf_request_queues
+ * @adapter: adapter structure
+ * @num: number of requested queues
+ *
+ * We get a default number of queues from the PF.  This enables us to request a
+ * different number.  Returns 0 on success, negative on failure
+ **/
+int i40evf_request_queues(struct i40evf_adapter *adapter, int num)
+{
+	struct virtchnl_vf_res_request vfres;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot request queues, command %d pending\n",
+			adapter->current_op);
+		return -EBUSY;
+	}
+
+	vfres.num_queue_pairs = num;
+
+	adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES;
+	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES,
+				  (u8 *)&vfres, sizeof(vfres));
+}
+
+/**
  * i40evf_add_ether_addrs
  * @adapter: adapter structure
  * @addrs: the MAC address filters to add (contiguous)
@@ -1068,6 +1096,19 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 				 "Invalid message %d from PF\n", v_opcode);
 		}
 		break;
+	case VIRTCHNL_OP_REQUEST_QUEUES: {
+		struct virtchnl_vf_res_request *vfres =
+			(struct virtchnl_vf_res_request *)msg;
+		if (vfres->num_queue_pairs != adapter->num_req_queues) {
+			dev_info(&adapter->pdev->dev,
+				 "Requested %d queues, PF can support %d\n",
+				 adapter->num_req_queues,
+				 vfres->num_queue_pairs);
+			adapter->num_req_queues = 0;
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+		}
+		}
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 1de82f247312..83cabff1e0ab 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -353,7 +353,18 @@
 #define E1000_RXPBS_CFG_TS_EN           0x80000000
 
 #define I210_RXPBSIZE_DEFAULT		0x000000A2 /* RXPBSIZE default */
+#define I210_RXPBSIZE_MASK		0x0000003F
+#define I210_RXPBSIZE_PB_32KB		0x00000020
 #define I210_TXPBSIZE_DEFAULT		0x04000014 /* TXPBSIZE default */
+#define I210_TXPBSIZE_MASK		0xC0FFFFFF
+#define I210_TXPBSIZE_PB0_8KB		(8 << 0)
+#define I210_TXPBSIZE_PB1_8KB		(8 << 6)
+#define I210_TXPBSIZE_PB2_4KB		(4 << 12)
+#define I210_TXPBSIZE_PB3_4KB		(4 << 18)
+
+#define I210_DTXMXPKTSZ_DEFAULT		0x00000098
+
+#define I210_SR_QUEUES_NUM		2
 
 /* SerDes Control */
 #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
@@ -1051,4 +1062,16 @@
 #define E1000_VLAPQF_P_VALID(_n)	(0x1 << (3 + (_n) * 4))
 #define E1000_VLAPQF_QUEUE_MASK	0x03
 
+/* TX Qav Control fields */
+#define E1000_TQAVCTRL_XMIT_MODE	BIT(0)
+#define E1000_TQAVCTRL_DATAFETCHARB	BIT(4)
+#define E1000_TQAVCTRL_DATATRANARB	BIT(8)
+
+/* TX Qav Credit Control fields */
+#define E1000_TQAVCC_IDLESLOPE_MASK	0xFFFF
+#define E1000_TQAVCC_QUEUEMODE		BIT(31)
+
+/* Transmit Descriptor Control fields */
+#define E1000_TXDCTL_PRIORITY		BIT(27)
+
 #endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 31a3f09df9f7..568c96842f28 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -421,6 +421,14 @@ do { \
 
 #define E1000_I210_FLA		0x1201C
 
+#define E1000_I210_DTXMXPKTSZ	0x355C
+
+#define E1000_I210_TXDCTL(_n)	(0x0E028 + ((_n) * 0x40))
+
+#define E1000_I210_TQAVCTRL	0x3570
+#define E1000_I210_TQAVCC(_n)	(0x3004 + ((_n) * 0x40))
+#define E1000_I210_TQAVHC(_n)	(0x300C + ((_n) * 0x40))
+
 #define E1000_INVM_DATA_REG(_n)	(0x12120 + 4*(_n))
 #define E1000_INVM_SIZE		64 /* Number of INVM Data Registers */
 
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 06ffb2bc713e..92845692087a 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -281,6 +281,11 @@ struct igb_ring {
 	u16 count;			/* number of desc. in the ring */
 	u8 queue_index;			/* logical index of the ring*/
 	u8 reg_idx;			/* physical index of the ring */
+	bool cbs_enable;		/* indicates if CBS is enabled */
+	s32 idleslope;			/* idleSlope in kbps */
+	s32 sendslope;			/* sendSlope in kbps */
+	s32 hicredit;			/* hiCredit in bytes */
+	s32 locredit;			/* loCredit in bytes */
 
 	/* everything past this point are written often */
 	u16 next_to_clean;
@@ -621,6 +626,7 @@ struct igb_adapter {
 #define IGB_FLAG_EEE			BIT(14)
 #define IGB_FLAG_VLAN_PROMISC		BIT(15)
 #define IGB_FLAG_RX_LEGACY		BIT(16)
+#define IGB_FLAG_FQTSS			BIT(17)
 
 /* Media Auto Sense */
 #define IGB_MAS_ENABLE_0		0X0001
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 18b6c25d4705..e94d3c256667 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
+#include <net/pkt_sched.h>
 #include <linux/net_tstamp.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
@@ -62,6 +63,17 @@
 #define BUILD 0
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
 __stringify(BUILD) "-k"
+
+enum queue_mode {
+	QUEUE_MODE_STRICT_PRIORITY,
+	QUEUE_MODE_STREAM_RESERVATION,
+};
+
+enum tx_queue_prio {
+	TX_QUEUE_PRIO_HIGH,
+	TX_QUEUE_PRIO_LOW,
+};
+
 char igb_driver_name[] = "igb";
 char igb_driver_version[] = DRV_VERSION;
 static const char igb_driver_string[] =
@@ -133,8 +145,8 @@ static void igb_clean_all_rx_rings(struct igb_adapter *);
 static void igb_clean_tx_ring(struct igb_ring *);
 static void igb_clean_rx_ring(struct igb_ring *);
 static void igb_set_rx_mode(struct net_device *);
-static void igb_update_phy_info(unsigned long);
-static void igb_watchdog(unsigned long);
+static void igb_update_phy_info(struct timer_list *);
+static void igb_watchdog(struct timer_list *);
 static void igb_watchdog_task(struct work_struct *);
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
 static void igb_get_stats64(struct net_device *dev,
@@ -1271,6 +1283,12 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
 		ring->count = adapter->tx_ring_count;
 		ring->queue_index = txr_idx;
 
+		ring->cbs_enable = false;
+		ring->idleslope = 0;
+		ring->sendslope = 0;
+		ring->hicredit = 0;
+		ring->locredit = 0;
+
 		u64_stats_init(&ring->tx_syncp);
 		u64_stats_init(&ring->tx_syncp2);
 
@@ -1598,6 +1616,284 @@ static void igb_get_hw_control(struct igb_adapter *adapter)
 			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 }
 
+static void enable_fqtss(struct igb_adapter *adapter, bool enable)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+
+	if (enable)
+		adapter->flags |= IGB_FLAG_FQTSS;
+	else
+		adapter->flags &= ~IGB_FLAG_FQTSS;
+
+	if (netif_running(netdev))
+		schedule_work(&adapter->reset_task);
+}
+
+static bool is_fqtss_enabled(struct igb_adapter *adapter)
+{
+	return (adapter->flags & IGB_FLAG_FQTSS) ? true : false;
+}
+
+static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue,
+				   enum tx_queue_prio prio)
+{
+	u32 val;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+	WARN_ON(queue < 0 || queue > 4);
+
+	val = rd32(E1000_I210_TXDCTL(queue));
+
+	if (prio == TX_QUEUE_PRIO_HIGH)
+		val |= E1000_TXDCTL_PRIORITY;
+	else
+		val &= ~E1000_TXDCTL_PRIORITY;
+
+	wr32(E1000_I210_TXDCTL(queue), val);
+}
+
+static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode)
+{
+	u32 val;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+	WARN_ON(queue < 0 || queue > 1);
+
+	val = rd32(E1000_I210_TQAVCC(queue));
+
+	if (mode == QUEUE_MODE_STREAM_RESERVATION)
+		val |= E1000_TQAVCC_QUEUEMODE;
+	else
+		val &= ~E1000_TQAVCC_QUEUEMODE;
+
+	wr32(E1000_I210_TQAVCC(queue), val);
+}
+
+/**
+ *  igb_configure_cbs - Configure Credit-Based Shaper (CBS)
+ *  @adapter: pointer to adapter struct
+ *  @queue: queue number
+ *  @enable: true = enable CBS, false = disable CBS
+ *  @idleslope: idleSlope in kbps
+ *  @sendslope: sendSlope in kbps
+ *  @hicredit: hiCredit in bytes
+ *  @locredit: loCredit in bytes
+ *
+ *  Configure CBS for a given hardware queue. When disabling, idleslope,
+ *  sendslope, hicredit, locredit arguments are ignored. Returns 0 if
+ *  success. Negative otherwise.
+ **/
+static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
+			      bool enable, int idleslope, int sendslope,
+			      int hicredit, int locredit)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tqavcc;
+	u16 value;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+	WARN_ON(queue < 0 || queue > 1);
+
+	if (enable) {
+		set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
+		set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+
+		/* According to i210 datasheet section 7.2.7.7, we should set
+		 * the 'idleSlope' field from TQAVCC register following the
+		 * equation:
+		 *
+		 * For 100 Mbps link speed:
+		 *
+		 *     value = BW * 0x7735 * 0.2                          (E1)
+		 *
+		 * For 1000Mbps link speed:
+		 *
+		 *     value = BW * 0x7735 * 2                            (E2)
+		 *
+		 * E1 and E2 can be merged into one equation as shown below.
+		 * Note that 'link-speed' is in Mbps.
+		 *
+		 *     value = BW * 0x7735 * 2 * link-speed
+		 *                           --------------               (E3)
+		 *                                1000
+		 *
+		 * 'BW' is the percentage bandwidth out of full link speed
+		 * which can be found with the following equation. Note that
+		 * idleSlope here is the parameter from this function which
+		 * is in kbps.
+		 *
+		 *     BW =     idleSlope
+		 *          -----------------                             (E4)
+		 *          link-speed * 1000
+		 *
+		 * That said, we can come up with a generic equation to
+		 * calculate the value we should set it TQAVCC register by
+		 * replacing 'BW' in E3 by E4. The resulting equation is:
+		 *
+		 * value =     idleSlope     * 0x7735 * 2 * link-speed
+		 *         -----------------            --------------    (E5)
+		 *         link-speed * 1000                 1000
+		 *
+		 * 'link-speed' is present in both sides of the fraction so
+		 * it is canceled out. The final equation is the following:
+		 *
+		 *     value = idleSlope * 61034
+		 *             -----------------                          (E6)
+		 *                  1000000
+		 */
+		value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
+
+		tqavcc = rd32(E1000_I210_TQAVCC(queue));
+		tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+		tqavcc |= value;
+		wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+		wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735);
+	} else {
+		set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
+		set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
+
+		/* Set idleSlope to zero. */
+		tqavcc = rd32(E1000_I210_TQAVCC(queue));
+		tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+		wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+		/* Set hiCredit to zero. */
+		wr32(E1000_I210_TQAVHC(queue), 0);
+	}
+
+	/* XXX: In i210 controller the sendSlope and loCredit parameters from
+	 * CBS are not configurable by software so we don't do any 'controller
+	 * configuration' in respect to these parameters.
+	 */
+
+	netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+		   (enable) ? "enabled" : "disabled", queue,
+		   idleslope, sendslope, hicredit, locredit);
+}
+
+static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
+			       bool enable, int idleslope, int sendslope,
+			       int hicredit, int locredit)
+{
+	struct igb_ring *ring;
+
+	if (queue < 0 || queue > adapter->num_tx_queues)
+		return -EINVAL;
+
+	ring = adapter->tx_ring[queue];
+
+	ring->cbs_enable = enable;
+	ring->idleslope = idleslope;
+	ring->sendslope = sendslope;
+	ring->hicredit = hicredit;
+	ring->locredit = locredit;
+
+	return 0;
+}
+
+static bool is_any_cbs_enabled(struct igb_adapter *adapter)
+{
+	struct igb_ring *ring;
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		ring = adapter->tx_ring[i];
+
+		if (ring->cbs_enable)
+			return true;
+	}
+
+	return false;
+}
+
+static void igb_setup_tx_mode(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 val;
+
+	/* Only i210 controller supports changing the transmission mode. */
+	if (hw->mac.type != e1000_i210)
+		return;
+
+	if (is_fqtss_enabled(adapter)) {
+		int i, max_queue;
+
+		/* Configure TQAVCTRL register: set transmit mode to 'Qav',
+		 * set data fetch arbitration to 'round robin' and set data
+		 * transfer arbitration to 'credit shaper algorithm.
+		 */
+		val = rd32(E1000_I210_TQAVCTRL);
+		val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB;
+		val &= ~E1000_TQAVCTRL_DATAFETCHARB;
+		wr32(E1000_I210_TQAVCTRL, val);
+
+		/* Configure Tx and Rx packet buffers sizes as described in
+		 * i210 datasheet section 7.2.7.7.
+		 */
+		val = rd32(E1000_TXPBS);
+		val &= ~I210_TXPBSIZE_MASK;
+		val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB |
+			I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB;
+		wr32(E1000_TXPBS, val);
+
+		val = rd32(E1000_RXPBS);
+		val &= ~I210_RXPBSIZE_MASK;
+		val |= I210_RXPBSIZE_PB_32KB;
+		wr32(E1000_RXPBS, val);
+
+		/* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
+		 * register should not exceed the buffer size programmed in
+		 * TXPBS. The smallest buffer size programmed in TXPBS is 4kB
+		 * so according to the datasheet we should set MAX_TPKT_SIZE to
+		 * 4kB / 64.
+		 *
+		 * However, when we do so, no frame from queue 2 and 3 are
+		 * transmitted.  It seems the MAX_TPKT_SIZE should not be great
+		 * or _equal_ to the buffer size programmed in TXPBS. For this
+		 * reason, we set set MAX_ TPKT_SIZE to (4kB - 1) / 64.
+		 */
+		val = (4096 - 1) / 64;
+		wr32(E1000_I210_DTXMXPKTSZ, val);
+
+		/* Since FQTSS mode is enabled, apply any CBS configuration
+		 * previously set. If no previous CBS configuration has been
+		 * done, then the initial configuration is applied, which means
+		 * CBS is disabled.
+		 */
+		max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ?
+			    adapter->num_tx_queues : I210_SR_QUEUES_NUM;
+
+		for (i = 0; i < max_queue; i++) {
+			struct igb_ring *ring = adapter->tx_ring[i];
+
+			igb_configure_cbs(adapter, i, ring->cbs_enable,
+					  ring->idleslope, ring->sendslope,
+					  ring->hicredit, ring->locredit);
+		}
+	} else {
+		wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+		wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+		wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT);
+
+		val = rd32(E1000_I210_TQAVCTRL);
+		/* According to Section 8.12.21, the other flags we've set when
+		 * enabling FQTSS are not relevant when disabling FQTSS so we
+		 * don't set they here.
+		 */
+		val &= ~E1000_TQAVCTRL_XMIT_MODE;
+		wr32(E1000_I210_TQAVCTRL, val);
+	}
+
+	netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ?
+		   "enabled" : "disabled");
+}
+
 /**
  *  igb_configure - configure the hardware for RX and TX
  *  @adapter: private board structure
@@ -1609,6 +1905,7 @@ static void igb_configure(struct igb_adapter *adapter)
 
 	igb_get_hw_control(adapter);
 	igb_set_rx_mode(netdev);
+	igb_setup_tx_mode(adapter);
 
 	igb_restore_vlan(adapter);
 
@@ -2150,6 +2447,55 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev,
 	return features;
 }
 
+static int igb_offload_cbs(struct igb_adapter *adapter,
+			   struct tc_cbs_qopt_offload *qopt)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	/* CBS offloading is only supported by i210 controller. */
+	if (hw->mac.type != e1000_i210)
+		return -EOPNOTSUPP;
+
+	/* CBS offloading is only supported by queue 0 and queue 1. */
+	if (qopt->queue < 0 || qopt->queue > 1)
+		return -EINVAL;
+
+	err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable,
+				  qopt->idleslope, qopt->sendslope,
+				  qopt->hicredit, qopt->locredit);
+	if (err)
+		return err;
+
+	if (is_fqtss_enabled(adapter)) {
+		igb_configure_cbs(adapter, qopt->queue, qopt->enable,
+				  qopt->idleslope, qopt->sendslope,
+				  qopt->hicredit, qopt->locredit);
+
+		if (!is_any_cbs_enabled(adapter))
+			enable_fqtss(adapter, false);
+
+	} else {
+		enable_fqtss(adapter, true);
+	}
+
+	return 0;
+}
+
+static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			void *type_data)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_QDISC_CBS:
+		return igb_offload_cbs(adapter, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops igb_netdev_ops = {
 	.ndo_open		= igb_open,
 	.ndo_stop		= igb_close,
@@ -2175,6 +2521,7 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_set_features	= igb_set_features,
 	.ndo_fdb_add		= igb_ndo_fdb_add,
 	.ndo_features_check	= igb_features_check,
+	.ndo_setup_tc		= igb_setup_tc,
 };
 
 /**
@@ -2538,10 +2885,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
 	}
 
-	setup_timer(&adapter->watchdog_timer, igb_watchdog,
-		    (unsigned long) adapter);
-	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
-		    (unsigned long) adapter);
+	timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
+	timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
 
 	INIT_WORK(&adapter->reset_task, igb_reset_task);
 	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
@@ -3162,6 +3507,8 @@ static int igb_sw_init(struct igb_adapter *adapter)
 	/* Setup and initialize a copy of the hw vlan table array */
 	adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
 				       GFP_ATOMIC);
+	if (!adapter->shadow_vfta)
+		return -ENOMEM;
 
 	/* This call may decrease the number of queues */
 	if (igb_init_interrupt_scheme(adapter, true)) {
@@ -4423,9 +4770,9 @@ static void igb_spoof_check(struct igb_adapter *adapter)
 /* Need to wait a few seconds after link up to get diagnostic information from
  * the phy
  */
-static void igb_update_phy_info(unsigned long data)
+static void igb_update_phy_info(struct timer_list *t)
 {
-	struct igb_adapter *adapter = (struct igb_adapter *) data;
+	struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
 	igb_get_phy_info(&adapter->hw);
 }
 
@@ -4512,9 +4859,9 @@ static void igb_check_lvmmc(struct igb_adapter *adapter)
  *  igb_watchdog - Timer Call-back
  *  @data: pointer to adapter cast into an unsigned long
  **/
-static void igb_watchdog(unsigned long data)
+static void igb_watchdog(struct timer_list *t)
 {
-	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
 	/* Do the rest outside of interrupt context */
 	schedule_work(&adapter->watchdog_task);
 }
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 1ed556911b14..713e8df23744 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1915,9 +1915,9 @@ static bool igbvf_has_link(struct igbvf_adapter *adapter)
  * igbvf_watchdog - Timer Call-back
  * @data: pointer to adapter cast into an unsigned long
  **/
-static void igbvf_watchdog(unsigned long data)
+static void igbvf_watchdog(struct timer_list *t)
 {
-	struct igbvf_adapter *adapter = (struct igbvf_adapter *)data;
+	struct igbvf_adapter *adapter = from_timer(adapter, t, watchdog_timer);
 
 	/* Do the rest outside of interrupt context */
 	schedule_work(&adapter->watchdog_task);
@@ -2878,8 +2878,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		       netdev->addr_len);
 	}
 
-	setup_timer(&adapter->watchdog_timer, &igbvf_watchdog,
-		    (unsigned long)adapter);
+	timer_setup(&adapter->watchdog_timer, igbvf_watchdog, 0);
 
 	INIT_WORK(&adapter->reset_task, igbvf_reset_task);
 	INIT_WORK(&adapter->watchdog_task, igbvf_watchdog_task);
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 5a713199653c..2353c383f0a7 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -83,7 +83,7 @@ static void ixgb_setup_rctl(struct ixgb_adapter *adapter);
 static void ixgb_clean_tx_ring(struct ixgb_adapter *adapter);
 static void ixgb_clean_rx_ring(struct ixgb_adapter *adapter);
 static void ixgb_set_multi(struct net_device *netdev);
-static void ixgb_watchdog(unsigned long data);
+static void ixgb_watchdog(struct timer_list *t);
 static netdev_tx_t ixgb_xmit_frame(struct sk_buff *skb,
 				   struct net_device *netdev);
 static int ixgb_change_mtu(struct net_device *netdev, int new_mtu);
@@ -508,9 +508,7 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw);
 
-	init_timer(&adapter->watchdog_timer);
-	adapter->watchdog_timer.function = ixgb_watchdog;
-	adapter->watchdog_timer.data = (unsigned long)adapter;
+	timer_setup(&adapter->watchdog_timer, ixgb_watchdog, 0);
 
 	INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task);
 
@@ -1152,9 +1150,9 @@ alloc_failed:
  **/
 
 static void
-ixgb_watchdog(unsigned long data)
+ixgb_watchdog(struct timer_list *t)
 {
-	struct ixgb_adapter *adapter = (struct ixgb_adapter *)data;
+	struct ixgb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
 	struct net_device *netdev = adapter->netdev;
 	struct ixgb_desc_ring *txdr = &adapter->tx_ring;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index dd5578756ae0..468c3555a629 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -275,6 +275,7 @@ struct ixgbe_rx_queue_stats {
 	u64 rsc_count;
 	u64 rsc_flush;
 	u64 non_eop_descs;
+	u64 alloc_rx_page;
 	u64 alloc_rx_page_failed;
 	u64 alloc_rx_buff_failed;
 	u64 csum_err;
@@ -434,8 +435,15 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
 }
 #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
 
+#define IXGBE_ITR_ADAPTIVE_MIN_INC	2
+#define IXGBE_ITR_ADAPTIVE_MIN_USECS	10
+#define IXGBE_ITR_ADAPTIVE_MAX_USECS	126
+#define IXGBE_ITR_ADAPTIVE_LATENCY	0x80
+#define IXGBE_ITR_ADAPTIVE_BULK		0x00
+
 struct ixgbe_ring_container {
 	struct ixgbe_ring *ring;	/* pointer to linked list of rings */
+	unsigned long next_update;	/* jiffies value of last update */
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
 	u16 work_limit;			/* total work allowed per interrupt */
@@ -655,6 +663,7 @@ struct ixgbe_adapter {
 	u64 rsc_total_count;
 	u64 rsc_total_flush;
 	u64 non_eop_descs;
+	u32 alloc_rx_page;
 	u32 alloc_rx_page_failed;
 	u32 alloc_rx_buff_failed;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 6e6ab6f6875e..9bef255f6a18 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3781,10 +3781,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
 	fw_cmd.ver_build = build;
 	fw_cmd.ver_sub = sub;
 	fw_cmd.hdr.checksum = 0;
-	fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
-				(FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
 	fw_cmd.pad = 0;
 	fw_cmd.pad2 = 0;
+	fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+				(FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
 
 	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
 		ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
@@ -4081,8 +4081,8 @@ bool ixgbe_mng_present(struct ixgbe_hw *hw)
 		return false;
 
 	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
-	fwsm &= IXGBE_FWSM_MODE_MASK;
-	return fwsm == IXGBE_FWSM_FW_MODE_PT;
+
+	return !!(fwsm & IXGBE_FWSM_FW_MODE_PT);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index c3e7a8191128..0aad1c2a3667 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -104,6 +104,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)},
 	{"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)},
 	{"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)},
+	{"alloc_rx_page", IXGBE_STAT(alloc_rx_page)},
 	{"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)},
 	{"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)},
 	{"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)},
@@ -1916,8 +1917,6 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
 				  unsigned int size)
 {
 	union ixgbe_adv_rx_desc *rx_desc;
-	struct ixgbe_rx_buffer *rx_buffer;
-	struct ixgbe_tx_buffer *tx_buffer;
 	u16 rx_ntc, tx_ntc, count = 0;
 
 	/* initialize next to clean and descriptor values */
@@ -1925,7 +1924,38 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
 	tx_ntc = tx_ring->next_to_clean;
 	rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc);
 
+	while (tx_ntc != tx_ring->next_to_use) {
+		union ixgbe_adv_tx_desc *tx_desc;
+		struct ixgbe_tx_buffer *tx_buffer;
+
+		tx_desc = IXGBE_TX_DESC(tx_ring, tx_ntc);
+
+		/* if DD is not set transmit has not completed */
+		if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+			return count;
+
+		/* unmap buffer on Tx side */
+		tx_buffer = &tx_ring->tx_buffer_info[tx_ntc];
+
+		/* Free all the Tx ring sk_buffs */
+		dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* increment Tx next to clean counter */
+		tx_ntc++;
+		if (tx_ntc == tx_ring->count)
+			tx_ntc = 0;
+	}
+
 	while (rx_desc->wb.upper.length) {
+		struct ixgbe_rx_buffer *rx_buffer;
+
 		/* check Rx buffer */
 		rx_buffer = &rx_ring->rx_buffer_info[rx_ntc];
 
@@ -1938,6 +1968,8 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
 		/* verify contents of skb */
 		if (ixgbe_check_lbtest_frame(rx_buffer, size))
 			count++;
+		else
+			break;
 
 		/* sync Rx buffer for device write */
 		dma_sync_single_for_device(rx_ring->dev,
@@ -1945,26 +1977,10 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
 					   ixgbe_rx_bufsz(rx_ring),
 					   DMA_FROM_DEVICE);
 
-		/* unmap buffer on Tx side */
-		tx_buffer = &tx_ring->tx_buffer_info[tx_ntc];
-
-		/* Free all the Tx ring sk_buffs */
-		dev_kfree_skb_any(tx_buffer->skb);
-
-		/* unmap skb header data */
-		dma_unmap_single(tx_ring->dev,
-				 dma_unmap_addr(tx_buffer, dma),
-				 dma_unmap_len(tx_buffer, len),
-				 DMA_TO_DEVICE);
-		dma_unmap_len_set(tx_buffer, len, 0);
-
-		/* increment Rx/Tx next to clean counters */
+		/* increment Rx next to clean counter */
 		rx_ntc++;
 		if (rx_ntc == rx_ring->count)
 			rx_ntc = 0;
-		tx_ntc++;
-		if (tx_ntc == tx_ring->count)
-			tx_ntc = 0;
 
 		/* fetch next descriptor */
 		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index f1bfae0c41d0..8e2a957aca18 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -806,6 +806,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring,
 	ring->next = head->ring;
 	head->ring = ring;
 	head->count++;
+	head->next_update = jiffies + 1;
 }
 
 /**
@@ -879,8 +880,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
 	/* initialize work limits */
 	q_vector->tx.work_limit = adapter->tx_work_limit;
 
-	/* initialize pointer to rings */
-	ring = q_vector->ring;
+	/* Initialize setting for adaptive ITR */
+	q_vector->tx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
+			   IXGBE_ITR_ADAPTIVE_LATENCY;
+	q_vector->rx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
+			   IXGBE_ITR_ADAPTIVE_LATENCY;
 
 	/* intialize ITR */
 	if (txr_count && !rxr_count) {
@@ -897,6 +901,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
 			q_vector->itr = adapter->rx_itr_setting;
 	}
 
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
 	while (txr_count) {
 		/* assign generic ring traits */
 		ring->dev = &adapter->pdev->dev;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 935a2f15b0b0..ca06c3cc2ca8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1620,6 +1620,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
 	bi->page = page;
 	bi->page_offset = ixgbe_rx_offset(rx_ring);
 	bi->pagecnt_bias = 1;
+	rx_ring->rx_stats.alloc_rx_page++;
 
 	return true;
 }
@@ -2133,6 +2134,21 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
 #if L1_CACHE_BYTES < 128
 	prefetch(xdp->data + L1_CACHE_BYTES);
 #endif
+	/* Note, we get here by enabling legacy-rx via:
+	 *
+	 *    ethtool --set-priv-flags <dev> legacy-rx on
+	 *
+	 * In this mode, we currently get 0 extra XDP headroom as
+	 * opposed to having legacy-rx off, where we process XDP
+	 * packets going to stack via ixgbe_build_skb(). The latter
+	 * provides us currently with 192 bytes of headroom.
+	 *
+	 * For ixgbe_construct_skb() mode it means that the
+	 * xdp->data_meta will always point to xdp->data, since
+	 * the helper cannot expand the head. Should this ever
+	 * change in future for legacy-rx mode on, then lets also
+	 * add xdp->data_meta handling here.
+	 */
 
 	/* allocate a skb to store the frags */
 	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
@@ -2165,6 +2181,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 				       struct xdp_buff *xdp,
 				       union ixgbe_adv_rx_desc *rx_desc)
 {
+	unsigned int metasize = xdp->data - xdp->data_meta;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
 #else
@@ -2174,10 +2191,14 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 #endif
 	struct sk_buff *skb;
 
-	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points extactly as xdp->data, otherwise we
+	 * likely have a consumer accessing first few bytes of meta
+	 * data, and then actual data.
+	 */
+	prefetch(xdp->data_meta);
 #if L1_CACHE_BYTES < 128
-	prefetch(xdp->data + L1_CACHE_BYTES);
+	prefetch(xdp->data_meta + L1_CACHE_BYTES);
 #endif
 
 	/* build an skb to around the page buffer */
@@ -2188,6 +2209,8 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 	/* update pointers within the skb to store the data */
 	skb_reserve(skb, xdp->data - xdp->data_hard_start);
 	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
 
 	/* record DMA address if this is the start of a chain of buffers */
 	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
@@ -2326,6 +2349,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		if (!skb) {
 			xdp.data = page_address(rx_buffer->page) +
 				   rx_buffer->page_offset;
+			xdp.data_meta = xdp.data;
 			xdp.data_hard_start = xdp.data -
 					      ixgbe_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;
@@ -2516,50 +2540,174 @@ enum latency_range {
 static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
 			     struct ixgbe_ring_container *ring_container)
 {
-	int bytes = ring_container->total_bytes;
-	int packets = ring_container->total_packets;
-	u32 timepassed_us;
-	u64 bytes_perint;
-	u8 itr_setting = ring_container->itr;
+	unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
+			   IXGBE_ITR_ADAPTIVE_LATENCY;
+	unsigned int avg_wire_size, packets, bytes;
+	unsigned long next_update = jiffies;
 
-	if (packets == 0)
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
+	if (!ring_container->ring)
 		return;
 
-	/* simple throttlerate management
-	 *   0-10MB/s   lowest (100000 ints/s)
-	 *  10-20MB/s   low    (20000 ints/s)
-	 *  20-1249MB/s bulk   (12000 ints/s)
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
 	 */
-	/* what was last interrupt timeslice? */
-	timepassed_us = q_vector->itr >> 2;
-	if (timepassed_us == 0)
-		return;
+	if (time_after(next_update, ring_container->next_update))
+		goto clear_counts;
 
-	bytes_perint = bytes / timepassed_us; /* bytes/usec */
+	packets = ring_container->total_packets;
 
-	switch (itr_setting) {
-	case lowest_latency:
-		if (bytes_perint > 10)
-			itr_setting = low_latency;
-		break;
-	case low_latency:
-		if (bytes_perint > 20)
-			itr_setting = bulk_latency;
-		else if (bytes_perint <= 10)
-			itr_setting = lowest_latency;
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * When this occurs just tick up our delay by the minimum value
+	 * and hope that this extra delay will prevent us from being called
+	 * without any work on our queue.
+	 */
+	if (!packets) {
+		itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+		if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+			itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+		itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
+		goto clear_counts;
+	}
+
+	bytes = ring_container->total_bytes;
+
+	/* If packets are less than 4 or bytes are less than 9000 assume
+	 * insufficient data to use bulk rate limiting approach. We are
+	 * likely latency driven.
+	 */
+	if (packets < 4 && bytes < 9000) {
+		itr = IXGBE_ITR_ADAPTIVE_LATENCY;
+		goto adjust_by_size;
+	}
+
+	/* Between 4 and 48 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
+	 */
+	if (packets < 48) {
+		itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+		if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+			itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+		goto clear_counts;
+	}
+
+	/* Between 48 and 96 is our "goldilocks" zone where we are working
+	 * out "just right". Just report that our current ITR is good for us.
+	 */
+	if (packets < 96) {
+		itr = q_vector->itr >> 2;
+		goto clear_counts;
+	}
+
+	/* If packet count is 96 or greater we are likely looking at a slight
+	 * overrun of the delay we want. Try halving our delay to see if that
+	 * will cut the number of packets in half per interrupt.
+	 */
+	if (packets < 256) {
+		itr = q_vector->itr >> 3;
+		if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
+			itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
+		goto clear_counts;
+	}
+
+	/* The paths below assume we are dealing with a bulk ITR since number
+	 * of packets is 256 or greater. We are just going to have to compute
+	 * a value and try to bring the count under control, though for smaller
+	 * packet sizes there isn't much we can do as NAPI polling will likely
+	 * be kicking in sooner rather than later.
+	 */
+	itr = IXGBE_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 60) {
+		/* Start at 50k ints/sec */
+		avg_wire_size = 5120;
+	} else if (avg_wire_size <= 316) {
+		/* 50K ints/sec to 16K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 2720;
+	} else if (avg_wire_size <= 1084) {
+		/* 16K ints/sec to 9.2K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size <= 1980) {
+		/* 9.2K ints/sec to 8K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 8K ints/sec */
+		avg_wire_size = 32256;
+	}
+
+	/* If we are in low latency mode half our delay which doubles the rate
+	 * to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size >>= 1;
+
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	switch (q_vector->adapter->link_speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+	case IXGBE_LINK_SPEED_100_FULL:
+	default:
+		itr += DIV_ROUND_UP(avg_wire_size,
+				    IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
+		       IXGBE_ITR_ADAPTIVE_MIN_INC;
 		break;
-	case bulk_latency:
-		if (bytes_perint <= 20)
-			itr_setting = low_latency;
+	case IXGBE_LINK_SPEED_2_5GB_FULL:
+	case IXGBE_LINK_SPEED_1GB_FULL:
+	case IXGBE_LINK_SPEED_10_FULL:
+		itr += DIV_ROUND_UP(avg_wire_size,
+				    IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
+		       IXGBE_ITR_ADAPTIVE_MIN_INC;
 		break;
 	}
 
-	/* clear work counters since we have the values we need */
+clear_counts:
+	/* write back value */
+	ring_container->itr = itr;
+
+	/* next update should occur within next jiffy */
+	ring_container->next_update = next_update + 1;
+
 	ring_container->total_bytes = 0;
 	ring_container->total_packets = 0;
-
-	/* write updated itr to ring container */
-	ring_container->itr = itr_setting;
 }
 
 /**
@@ -2601,34 +2749,19 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
 
 static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
 {
-	u32 new_itr = q_vector->itr;
-	u8 current_itr;
+	u32 new_itr;
 
 	ixgbe_update_itr(q_vector, &q_vector->tx);
 	ixgbe_update_itr(q_vector, &q_vector->rx);
 
-	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+	/* use the smallest value of new ITR delay calculations */
+	new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
 
-	switch (current_itr) {
-	/* counts and packets in update_itr are dependent on these numbers */
-	case lowest_latency:
-		new_itr = IXGBE_100K_ITR;
-		break;
-	case low_latency:
-		new_itr = IXGBE_20K_ITR;
-		break;
-	case bulk_latency:
-		new_itr = IXGBE_12K_ITR;
-		break;
-	default:
-		break;
-	}
+	/* Clear latency flag if set, shift into correct position */
+	new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+	new_itr <<= 2;
 
 	if (new_itr != q_vector->itr) {
-		/* do an exponential smoothing */
-		new_itr = (10 * new_itr * q_vector->itr) /
-			  ((9 * new_itr) + q_vector->itr);
-
 		/* save the algorithm value here */
 		q_vector->itr = new_itr;
 
@@ -6771,6 +6904,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 	u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
 	u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
 	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+	u64 alloc_rx_page = 0;
 	u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
 
 	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
@@ -6791,6 +6925,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
 		non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+		alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
 		alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
 		alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
 		hw_csum_rx_error += rx_ring->rx_stats.csum_err;
@@ -6798,6 +6933,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 		packets += rx_ring->stats.packets;
 	}
 	adapter->non_eop_descs = non_eop_descs;
+	adapter->alloc_rx_page = alloc_rx_page;
 	adapter->alloc_rx_page_failed = alloc_rx_page_failed;
 	adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
 	adapter->hw_csum_rx_error = hw_csum_rx_error;
@@ -7554,9 +7690,9 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
  * ixgbe_service_timer - Timer Call-back
  * @data: pointer to adapter cast into an unsigned long
  **/
-static void ixgbe_service_timer(unsigned long data)
+static void ixgbe_service_timer(struct timer_list *t)
 {
-	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data;
+	struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer);
 	unsigned long next_event_offset;
 
 	/* poll faster when waiting for link */
@@ -9223,13 +9359,10 @@ free_jump:
 	return err;
 }
 
-static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
+static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
 				  struct tc_cls_u32_offload *cls_u32)
 {
-	struct ixgbe_adapter *adapter = netdev_priv(dev);
-
-	if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
-	    cls_u32->common.chain_index)
+	if (cls_u32->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_u32->command) {
@@ -9248,6 +9381,43 @@ static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
 	}
 }
 
+static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				   void *cb_priv)
+{
+	struct ixgbe_adapter *adapter = cb_priv;
+
+	if (!tc_can_offload(adapter->netdev))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSU32:
+		return ixgbe_setup_tc_cls_u32(adapter, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ixgbe_setup_tc_block(struct net_device *dev,
+				struct tc_block_offload *f)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
+					     adapter, adapter);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
+					adapter);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int ixgbe_setup_tc_mqprio(struct net_device *dev,
 				 struct tc_mqprio_qopt *mqprio)
 {
@@ -9259,9 +9429,9 @@ static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			    void *type_data)
 {
 	switch (type) {
-	case TC_SETUP_CLSU32:
-		return ixgbe_setup_tc_cls_u32(dev, type_data);
-	case TC_SETUP_MQPRIO:
+	case TC_SETUP_BLOCK:
+		return ixgbe_setup_tc_block(dev, type_data);
+	case TC_SETUP_QDISC_MQPRIO:
 		return ixgbe_setup_tc_mqprio(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
@@ -9733,6 +9903,17 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 	limit = find_last_bit(&adapter->fwd_bitmask, 32);
 	adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
 	ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter);
+
+	/* go back to full RSS if we're done with our VMQs */
+	if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+		int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+				num_online_cpus());
+
+		adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
+		adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+		adapter->ring_feature[RING_F_RSS].limit = rss;
+	}
+
 	ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev));
 	netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n",
 		   fwd_adapter->pool, adapter->num_rx_pools,
@@ -9823,7 +10004,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
 	return 0;
 }
 
-static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
@@ -9932,7 +10113,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_udp_tunnel_add	= ixgbe_add_udp_tunnel_port,
 	.ndo_udp_tunnel_del	= ixgbe_del_udp_tunnel_port,
 	.ndo_features_check	= ixgbe_features_check,
-	.ndo_xdp		= ixgbe_xdp,
+	.ndo_bpf		= ixgbe_xdp,
 	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
 	.ndo_xdp_flush		= ixgbe_xdp_flush,
 };
@@ -10355,8 +10536,7 @@ skip_sriov:
 	ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
 	ixgbe_mac_set_default_filter(adapter);
 
-	setup_timer(&adapter->service_timer, &ixgbe_service_timer,
-		    (unsigned long) adapter);
+	timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
 
 	if (ixgbe_removed(hw->hw_addr)) {
 		err = -EIO;
@@ -10712,6 +10892,9 @@ skip_bad_vf_detection:
 	if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
 		return PCI_ERS_RESULT_DISCONNECT;
 
+	if (!netif_device_present(netdev))
+		return PCI_ERS_RESULT_DISCONNECT;
+
 	rtnl_lock();
 	netif_device_detach(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 6ea0d6a5fb90..b8c5fd2a2115 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -619,12 +619,6 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
 		usleep_range(5000, 10000);
 	}
 
-	/* Failed to get SW only semaphore */
-	if (swmask == IXGBE_GSSR_SW_MNG_SM) {
-		hw_dbg(hw, "Failed to get SW only semaphore\n");
-		return IXGBE_ERR_SWFW_SYNC;
-	}
-
 	/* If the resource is not released by the FW/HW the SW can assume that
 	 * the FW/HW malfunctions. In that case the SW should set the SW bit(s)
 	 * of the requested resource(s) while ignoring the corresponding FW/HW
@@ -647,7 +641,8 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
 	 */
 	if (swfw_sync & swmask) {
 		u32 rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM |
-			    IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM;
+			    IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM |
+			    IXGBE_GSSR_SW_MNG_SM;
 
 		if (swi2c_mask)
 			rmask |= IXGBE_GSSR_I2C_MASK;
@@ -763,6 +758,8 @@ static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw)
  **/
 void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw)
 {
+	u32 rmask;
+
 	/* First try to grab the semaphore but we don't need to bother
 	 * looking to see whether we got the lock or not since we do
 	 * the same thing regardless of whether we got the lock or not.
@@ -771,6 +768,14 @@ void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw)
 	 */
 	ixgbe_get_swfw_sync_semaphore(hw);
 	ixgbe_release_swfw_sync_semaphore(hw);
+
+	/* Acquire and release all software resources. */
+	rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM |
+		IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM |
+		IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_I2C_MASK;
+
+	ixgbe_acquire_swfw_sync_X540(hw, rmask);
+	ixgbe_release_swfw_sync_X540(hw, rmask);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 19fbb2f28ea4..cb7da5f9c4da 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
 		/* convert offset from words to bytes */
 		buffer.address = cpu_to_be32((offset + current_word) * 2);
 		buffer.length = cpu_to_be16(words_to_read * 2);
+		buffer.pad2 = 0;
+		buffer.pad3 = 0;
 
 		status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
 					    IXGBE_HI_COMMAND_TIMEOUT);
@@ -3192,6 +3194,9 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
 
 	/* Identify the PHY or SFP module */
 	ret_val = phy->ops.identify(hw);
+	if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+	    ret_val == IXGBE_ERR_PHY_ADDR_INVALID)
+		return ret_val;
 
 	/* Setup function pointers based on detected hardware */
 	ixgbe_init_mac_link_ops_X550em(hw);
@@ -3394,9 +3399,10 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
 	ixgbe_clear_tx_pending(hw);
 
 	/* PHY ops must be identified and initialized prior to reset */
-
-	/* Identify PHY and related function pointers */
 	status = hw->phy.ops.init(hw);
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+	    status == IXGBE_ERR_PHY_ADDR_INVALID)
+		return status;
 
 	/* start the external PHY */
 	if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
@@ -3884,7 +3890,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x_fw = {
 	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550,
 };
 
-static struct ixgbe_mac_operations mac_ops_x550em_a = {
+static const struct ixgbe_mac_operations mac_ops_x550em_a = {
 	X550_COMMON_MAC
 	.led_on			= ixgbe_led_on_t_x550em,
 	.led_off		= ixgbe_led_off_t_x550em,
@@ -3905,7 +3911,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = {
 	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550a,
 };
 
-static struct ixgbe_mac_operations mac_ops_x550em_a_fw = {
+static const struct ixgbe_mac_operations mac_ops_x550em_a_fw = {
 	X550_COMMON_MAC
 	.led_on			= ixgbe_led_on_generic,
 	.led_off		= ixgbe_led_off_generic,
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index cacb30682434..feed11bc9ddf 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2747,9 +2747,10 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
  * ixgbevf_service_timer - Timer Call-back
  * @data: pointer to adapter cast into an unsigned long
  **/
-static void ixgbevf_service_timer(unsigned long data)
+static void ixgbevf_service_timer(struct timer_list *t)
 {
-	struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data;
+	struct ixgbevf_adapter *adapter = from_timer(adapter, t,
+						     service_timer);
 
 	/* Reset the timer */
 	mod_timer(&adapter->service_timer, (HZ * 2) + jiffies);
@@ -4120,8 +4121,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_sw_init;
 	}
 
-	setup_timer(&adapter->service_timer, &ixgbevf_service_timer,
-		    (unsigned long)adapter);
+	timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0);
 
 	INIT_WORK(&adapter->service_task, ixgbevf_service_task);
 	set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state);
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 3c0a6451273d..ae195f8adff5 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -4,6 +4,7 @@
  *  Copyright 2004 IDT Inc. (rischelp@idt.com)
  *  Copyright 2006 Felix Fietkau <nbd@openwrt.org>
  *  Copyright 2008 Florian Fainelli <florian@openwrt.org>
+ *  Copyright 2017 Roman Yeryomin <roman@advem.lv>
  *
  *  This program is free software; you can redistribute  it and/or modify it
  *  under  the terms of  the GNU General  Public License as published by the
@@ -64,9 +65,9 @@
 #include <asm/mach-rc32434/eth.h>
 #include <asm/mach-rc32434/dma_v.h>
 
-#define DRV_NAME        "korina"
-#define DRV_VERSION     "0.10"
-#define DRV_RELDATE     "04Mar2008"
+#define DRV_NAME	"korina"
+#define DRV_VERSION	"0.20"
+#define DRV_RELDATE	"15Sep2017"
 
 #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \
 				   ((dev)->dev_addr[1]))
@@ -75,7 +76,7 @@
 				   ((dev)->dev_addr[4] << 8)  | \
 				   ((dev)->dev_addr[5]))
 
-#define MII_CLOCK 1250000 	/* no more than 2.5MHz */
+#define MII_CLOCK	1250000 /* no more than 2.5MHz */
 
 /* the following must be powers of two */
 #define KORINA_NUM_RDS	64  /* number of receive descriptors */
@@ -87,15 +88,19 @@
 #define KORINA_RBSIZE	1536 /* size of one resource buffer = Ether MTU */
 #define KORINA_RDS_MASK	(KORINA_NUM_RDS - 1)
 #define KORINA_TDS_MASK	(KORINA_NUM_TDS - 1)
-#define RD_RING_SIZE 	(KORINA_NUM_RDS * sizeof(struct dma_desc))
+#define RD_RING_SIZE	(KORINA_NUM_RDS * sizeof(struct dma_desc))
 #define TD_RING_SIZE	(KORINA_NUM_TDS * sizeof(struct dma_desc))
 
-#define TX_TIMEOUT 	(6000 * HZ / 1000)
+#define TX_TIMEOUT	(6000 * HZ / 1000)
 
-enum chain_status { desc_filled, desc_empty };
-#define IS_DMA_FINISHED(X)   (((X) & (DMA_DESC_FINI)) != 0)
-#define IS_DMA_DONE(X)   (((X) & (DMA_DESC_DONE)) != 0)
-#define RCVPKT_LENGTH(X)     (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT)
+enum chain_status {
+	desc_filled,
+	desc_empty
+};
+
+#define IS_DMA_FINISHED(X)	(((X) & (DMA_DESC_FINI)) != 0)
+#define IS_DMA_DONE(X)		(((X) & (DMA_DESC_DONE)) != 0)
+#define RCVPKT_LENGTH(X)	(((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT)
 
 /* Information that need to be kept for each board. */
 struct korina_private {
@@ -122,10 +127,8 @@ struct korina_private {
 
 	int rx_irq;
 	int tx_irq;
-	int ovr_irq;
-	int und_irq;
 
-	spinlock_t lock;        /* NIC xmit lock */
+	spinlock_t lock;	/* NIC xmit lock */
 
 	int dma_halt_cnt;
 	int dma_run_cnt;
@@ -148,17 +151,17 @@ static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr)
 static inline void korina_abort_dma(struct net_device *dev,
 					struct dma_reg *ch)
 {
-       if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) {
-	       writel(0x10, &ch->dmac);
+	if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) {
+		writel(0x10, &ch->dmac);
 
-	       while (!(readl(&ch->dmas) & DMA_STAT_HALT))
-		       netif_trans_update(dev);
+		while (!(readl(&ch->dmas) & DMA_STAT_HALT))
+			netif_trans_update(dev);
 
-	       writel(0, &ch->dmas);
-       }
+		writel(0, &ch->dmas);
+	}
 
-       writel(0, &ch->dmadptr);
-       writel(0, &ch->dmandptr);
+	writel(0, &ch->dmadptr);
+	writel(0, &ch->dmandptr);
 }
 
 static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr)
@@ -365,59 +368,60 @@ static int korina_rx(struct net_device *dev, int limit)
 		if ((KORINA_RBSIZE - (u32)DMA_COUNT(rd->control)) == 0)
 			break;
 
-		/* Update statistics counters */
-		if (devcs & ETH_RX_CRC)
-			dev->stats.rx_crc_errors++;
-		if (devcs & ETH_RX_LOR)
-			dev->stats.rx_length_errors++;
-		if (devcs & ETH_RX_LE)
-			dev->stats.rx_length_errors++;
-		if (devcs & ETH_RX_OVR)
-			dev->stats.rx_fifo_errors++;
-		if (devcs & ETH_RX_CV)
-			dev->stats.rx_frame_errors++;
-		if (devcs & ETH_RX_CES)
-			dev->stats.rx_length_errors++;
-		if (devcs & ETH_RX_MP)
-			dev->stats.multicast++;
+		/* check that this is a whole packet
+		 * WARNING: DMA_FD bit incorrectly set
+		 * in Rc32434 (errata ref #077) */
+		if (!(devcs & ETH_RX_LD))
+			goto next;
 
-		if ((devcs & ETH_RX_LD) != ETH_RX_LD) {
-			/* check that this is a whole packet
-			 * WARNING: DMA_FD bit incorrectly set
-			 * in Rc32434 (errata ref #077) */
+		if (!(devcs & ETH_RX_ROK)) {
+			/* Update statistics counters */
 			dev->stats.rx_errors++;
 			dev->stats.rx_dropped++;
-		} else if ((devcs & ETH_RX_ROK)) {
-			pkt_len = RCVPKT_LENGTH(devcs);
+			if (devcs & ETH_RX_CRC)
+				dev->stats.rx_crc_errors++;
+			if (devcs & ETH_RX_LE)
+				dev->stats.rx_length_errors++;
+			if (devcs & ETH_RX_OVR)
+				dev->stats.rx_fifo_errors++;
+			if (devcs & ETH_RX_CV)
+				dev->stats.rx_frame_errors++;
+			if (devcs & ETH_RX_CES)
+				dev->stats.rx_frame_errors++;
+
+			goto next;
+		}
 
-			/* must be the (first and) last
-			 * descriptor then */
-			pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data;
+		pkt_len = RCVPKT_LENGTH(devcs);
 
-			/* invalidate the cache */
-			dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4);
+		/* must be the (first and) last
+		 * descriptor then */
+		pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data;
 
-			/* Malloc up new buffer. */
-			skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE);
+		/* invalidate the cache */
+		dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4);
 
-			if (!skb_new)
-				break;
-			/* Do not count the CRC */
-			skb_put(skb, pkt_len - 4);
-			skb->protocol = eth_type_trans(skb, dev);
+		/* Malloc up new buffer. */
+		skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE);
 
-			/* Pass the packet to upper layers */
-			netif_receive_skb(skb);
-			dev->stats.rx_packets++;
-			dev->stats.rx_bytes += pkt_len;
+		if (!skb_new)
+			break;
+		/* Do not count the CRC */
+		skb_put(skb, pkt_len - 4);
+		skb->protocol = eth_type_trans(skb, dev);
 
-			/* Update the mcast stats */
-			if (devcs & ETH_RX_MP)
-				dev->stats.multicast++;
+		/* Pass the packet to upper layers */
+		napi_gro_receive(&lp->napi, skb);
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += pkt_len;
 
-			lp->rx_skb[lp->rx_next_done] = skb_new;
-		}
+		/* Update the mcast stats */
+		if (devcs & ETH_RX_MP)
+			dev->stats.multicast++;
+
+		lp->rx_skb[lp->rx_next_done] = skb_new;
 
+next:
 		rd->devcs = 0;
 
 		/* Restore descriptor's curr_addr */
@@ -649,10 +653,10 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media)
 						&lp->eth_regs->ethmac2);
 }
 
-static void korina_poll_media(unsigned long data)
+static void korina_poll_media(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct korina_private *lp = netdev_priv(dev);
+	struct korina_private *lp = from_timer(lp, t, media_check_timer);
+	struct net_device *dev = lp->dev;
 
 	korina_check_media(dev, 0);
 	mod_timer(&lp->media_check_timer, jiffies + HZ);
@@ -686,7 +690,7 @@ static int korina_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 /* ethtool helpers */
 static void netdev_get_drvinfo(struct net_device *dev,
-			struct ethtool_drvinfo *info)
+				struct ethtool_drvinfo *info)
 {
 	struct korina_private *lp = netdev_priv(dev);
 
@@ -729,10 +733,10 @@ static u32 netdev_get_link(struct net_device *dev)
 }
 
 static const struct ethtool_ops netdev_ethtool_ops = {
-	.get_drvinfo            = netdev_get_drvinfo,
-	.get_link               = netdev_get_link,
-	.get_link_ksettings     = netdev_get_link_ksettings,
-	.set_link_ksettings     = netdev_set_link_ksettings,
+	.get_drvinfo		= netdev_get_drvinfo,
+	.get_link		= netdev_get_link,
+	.get_link_ksettings	= netdev_get_link_ksettings,
+	.set_link_ksettings	= netdev_set_link_ksettings,
 };
 
 static int korina_alloc_ring(struct net_device *dev)
@@ -864,7 +868,7 @@ static int korina_init(struct net_device *dev)
 	/* Management Clock Prescaler Divisor
 	 * Clock independent setting */
 	writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1,
-		       &lp->eth_regs->ethmcp);
+			&lp->eth_regs->ethmcp);
 
 	/* don't transmit until fifo contains 48b */
 	writel(48, &lp->eth_regs->ethfifott);
@@ -891,8 +895,6 @@ static void korina_restart_task(struct work_struct *work)
 	 */
 	disable_irq(lp->rx_irq);
 	disable_irq(lp->tx_irq);
-	disable_irq(lp->ovr_irq);
-	disable_irq(lp->und_irq);
 
 	writel(readl(&lp->tx_dma_regs->dmasm) |
 				DMA_STAT_FINI | DMA_STAT_ERR,
@@ -911,40 +913,10 @@ static void korina_restart_task(struct work_struct *work)
 	}
 	korina_multicast_list(dev);
 
-	enable_irq(lp->und_irq);
-	enable_irq(lp->ovr_irq);
 	enable_irq(lp->tx_irq);
 	enable_irq(lp->rx_irq);
 }
 
-static void korina_clear_and_restart(struct net_device *dev, u32 value)
-{
-	struct korina_private *lp = netdev_priv(dev);
-
-	netif_stop_queue(dev);
-	writel(value, &lp->eth_regs->ethintfc);
-	schedule_work(&lp->restart_task);
-}
-
-/* Ethernet Tx Underflow interrupt */
-static irqreturn_t korina_und_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev = dev_id;
-	struct korina_private *lp = netdev_priv(dev);
-	unsigned int und;
-
-	spin_lock(&lp->lock);
-
-	und = readl(&lp->eth_regs->ethintfc);
-
-	if (und & ETH_INT_FC_UND)
-		korina_clear_and_restart(dev, und & ~ETH_INT_FC_UND);
-
-	spin_unlock(&lp->lock);
-
-	return IRQ_HANDLED;
-}
-
 static void korina_tx_timeout(struct net_device *dev)
 {
 	struct korina_private *lp = netdev_priv(dev);
@@ -952,25 +924,6 @@ static void korina_tx_timeout(struct net_device *dev)
 	schedule_work(&lp->restart_task);
 }
 
-/* Ethernet Rx Overflow interrupt */
-static irqreturn_t
-korina_ovr_interrupt(int irq, void *dev_id)
-{
-	struct net_device *dev = dev_id;
-	struct korina_private *lp = netdev_priv(dev);
-	unsigned int ovr;
-
-	spin_lock(&lp->lock);
-	ovr = readl(&lp->eth_regs->ethintfc);
-
-	if (ovr & ETH_INT_FC_OVR)
-		korina_clear_and_restart(dev, ovr & ~ETH_INT_FC_OVR);
-
-	spin_unlock(&lp->lock);
-
-	return IRQ_HANDLED;
-}
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void korina_poll_controller(struct net_device *dev)
 {
@@ -993,48 +946,26 @@ static int korina_open(struct net_device *dev)
 	}
 
 	/* Install the interrupt handler
-	 * that handles the Done Finished
-	 * Ovr and Und Events */
+	 * that handles the Done Finished */
 	ret = request_irq(lp->rx_irq, korina_rx_dma_interrupt,
 			0, "Korina ethernet Rx", dev);
 	if (ret < 0) {
 		printk(KERN_ERR "%s: unable to get Rx DMA IRQ %d\n",
-		    dev->name, lp->rx_irq);
+			dev->name, lp->rx_irq);
 		goto err_release;
 	}
 	ret = request_irq(lp->tx_irq, korina_tx_dma_interrupt,
 			0, "Korina ethernet Tx", dev);
 	if (ret < 0) {
 		printk(KERN_ERR "%s: unable to get Tx DMA IRQ %d\n",
-		    dev->name, lp->tx_irq);
+			dev->name, lp->tx_irq);
 		goto err_free_rx_irq;
 	}
 
-	/* Install handler for overrun error. */
-	ret = request_irq(lp->ovr_irq, korina_ovr_interrupt,
-			0, "Ethernet Overflow", dev);
-	if (ret < 0) {
-		printk(KERN_ERR "%s: unable to get OVR IRQ %d\n",
-		    dev->name, lp->ovr_irq);
-		goto err_free_tx_irq;
-	}
-
-	/* Install handler for underflow error. */
-	ret = request_irq(lp->und_irq, korina_und_interrupt,
-			0, "Ethernet Underflow", dev);
-	if (ret < 0) {
-		printk(KERN_ERR "%s: unable to get UND IRQ %d\n",
-		    dev->name, lp->und_irq);
-		goto err_free_ovr_irq;
-	}
 	mod_timer(&lp->media_check_timer, jiffies + 1);
 out:
 	return ret;
 
-err_free_ovr_irq:
-	free_irq(lp->ovr_irq, dev);
-err_free_tx_irq:
-	free_irq(lp->tx_irq, dev);
 err_free_rx_irq:
 	free_irq(lp->rx_irq, dev);
 err_release:
@@ -1052,8 +983,6 @@ static int korina_close(struct net_device *dev)
 	/* Disable interrupts */
 	disable_irq(lp->rx_irq);
 	disable_irq(lp->tx_irq);
-	disable_irq(lp->ovr_irq);
-	disable_irq(lp->und_irq);
 
 	korina_abort_tx(dev);
 	tmp = readl(&lp->tx_dma_regs->dmasm);
@@ -1073,8 +1002,6 @@ static int korina_close(struct net_device *dev)
 
 	free_irq(lp->rx_irq, dev);
 	free_irq(lp->tx_irq, dev);
-	free_irq(lp->ovr_irq, dev);
-	free_irq(lp->und_irq, dev);
 
 	return 0;
 }
@@ -1113,8 +1040,6 @@ static int korina_probe(struct platform_device *pdev)
 
 	lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx");
 	lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx");
-	lp->ovr_irq = platform_get_irq_byname(pdev, "korina_ovr");
-	lp->und_irq = platform_get_irq_byname(pdev, "korina_und");
 
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs");
 	dev->base_addr = r->start;
@@ -1162,7 +1087,7 @@ static int korina_probe(struct platform_device *pdev)
 	dev->netdev_ops = &korina_netdev_ops;
 	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	netif_napi_add(dev, &lp->napi, korina_poll, 64);
+	netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT);
 
 	lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05);
 	lp->mii_if.dev = dev;
@@ -1178,7 +1103,7 @@ static int korina_probe(struct platform_device *pdev)
 			": cannot register net device: %d\n", rc);
 		goto probe_err_register;
 	}
-	setup_timer(&lp->media_check_timer, korina_poll_media, (unsigned long) dev);
+	timer_setup(&lp->media_check_timer, korina_poll_media, 0);
 
 	INIT_WORK(&lp->restart_task, korina_restart_task);
 
@@ -1226,5 +1151,6 @@ module_platform_driver(korina_driver);
 MODULE_AUTHOR("Philip Rischel <rischelp@idt.com>");
 MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
 MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
+MODULE_AUTHOR("Roman Yeryomin <roman@advem.lv>");
 MODULE_DESCRIPTION("IDT RC32434 (Korina) Ethernet driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 64a04975bcf8..bc93b69cfd1e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -816,11 +816,14 @@ static void mvneta_txq_pend_desc_add(struct mvneta_port *pp,
 {
 	u32 val;
 
-	/* Only 255 descriptors can be added at once ; Assume caller
-	 * process TX desriptors in quanta less than 256
-	 */
-	val = pend_desc + txq->pending;
-	mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val);
+	pend_desc += txq->pending;
+
+	/* Only 255 Tx descriptors can be added at once */
+	do {
+		val = min(pend_desc, 255);
+		mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val);
+		pend_desc -= val;
+	} while (pend_desc > 0);
 	txq->pending = 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index fcf9ba5eb8d1..6c20e811f973 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -38,11 +38,12 @@
 #include <net/ipv6.h>
 #include <net/tso.h>
 
-/* RX Fifo Registers */
+/* Fifo Registers */
 #define MVPP2_RX_DATA_FIFO_SIZE_REG(port)	(0x00 + 4 * (port))
 #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port)	(0x20 + 4 * (port))
 #define MVPP2_RX_MIN_PKT_SIZE_REG		0x60
 #define MVPP2_RX_FIFO_INIT_REG			0x64
+#define MVPP22_TX_FIFO_SIZE_REG(port)		(0x8860 + 4 * (port))
 
 /* RX DMA Top Registers */
 #define MVPP2_RX_CTRL_REG(port)			(0x140 + 4 * (port))
@@ -82,6 +83,16 @@
 #define MVPP2_PRS_TCAM_CTRL_REG			0x1230
 #define     MVPP2_PRS_TCAM_EN_MASK		BIT(0)
 
+/* RSS Registers */
+#define MVPP22_RSS_INDEX			0x1500
+#define     MVPP22_RSS_INDEX_TABLE_ENTRY(idx)	((idx) << 8)
+#define     MVPP22_RSS_INDEX_TABLE(idx)		((idx) << 8)
+#define     MVPP22_RSS_INDEX_QUEUE(idx)		((idx) << 16)
+#define MVPP22_RSS_TABLE_ENTRY			0x1508
+#define MVPP22_RSS_TABLE			0x1510
+#define     MVPP22_RSS_TABLE_POINTER(p)		(p)
+#define MVPP22_RSS_WIDTH			0x150c
+
 /* Classifier Registers */
 #define MVPP2_CLS_MODE_REG			0x1800
 #define     MVPP2_CLS_MODE_ACTIVE_MASK		BIT(0)
@@ -482,6 +493,13 @@
 /* Maximum number of TXQs used by single port */
 #define MVPP2_MAX_TXQ			8
 
+/* MVPP2_MAX_TSO_SEGS is the maximum number of fragments to allow in the GSO
+ * skb. As we need a maxium of two descriptors per fragments (1 header, 1 data),
+ * multiply this value by two to count the maximum number of skb descs needed.
+ */
+#define MVPP2_MAX_TSO_SEGS		300
+#define MVPP2_MAX_SKB_DESCS		(MVPP2_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
+
 /* Dfault number of RXQs in use */
 #define MVPP2_DEFAULT_RXQ		4
 
@@ -504,9 +522,17 @@
 #define MVPP2_TX_DESC_ALIGN		(MVPP2_DESC_ALIGNED_SIZE - 1)
 
 /* RX FIFO constants */
-#define MVPP2_RX_FIFO_PORT_DATA_SIZE	0x2000
-#define MVPP2_RX_FIFO_PORT_ATTR_SIZE	0x80
-#define MVPP2_RX_FIFO_PORT_MIN_PKT	0x80
+#define MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB	0x8000
+#define MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB	0x2000
+#define MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB	0x1000
+#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB	0x200
+#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB	0x80
+#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB	0x40
+#define MVPP2_RX_FIFO_PORT_MIN_PKT		0x80
+
+/* TX FIFO constants */
+#define MVPP22_TX_FIFO_DATA_SIZE_10KB		0xa
+#define MVPP22_TX_FIFO_DATA_SIZE_3KB		0x3
 
 /* RX buffer constants */
 #define MVPP2_SKB_SHINFO_SIZE \
@@ -737,6 +763,10 @@ enum mvpp2_prs_l3_cast {
 #define MVPP2_CLS_FLOWS_TBL_SIZE	512
 #define MVPP2_CLS_FLOWS_TBL_DATA_WORDS	3
 #define MVPP2_CLS_LKP_TBL_SIZE		64
+#define MVPP2_CLS_RX_QUEUES		256
+
+/* RSS constants */
+#define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* BM constants */
 #define MVPP2_BM_POOLS_NUM		8
@@ -769,6 +799,42 @@ enum mvpp2_bm_type {
 	MVPP2_BM_SWF_SHORT
 };
 
+/* GMAC MIB Counters register definitions */
+#define MVPP21_MIB_COUNTERS_OFFSET		0x1000
+#define MVPP21_MIB_COUNTERS_PORT_SZ		0x400
+#define MVPP22_MIB_COUNTERS_OFFSET		0x0
+#define MVPP22_MIB_COUNTERS_PORT_SZ		0x100
+
+#define MVPP2_MIB_GOOD_OCTETS_RCVD		0x0
+#define MVPP2_MIB_BAD_OCTETS_RCVD		0x8
+#define MVPP2_MIB_CRC_ERRORS_SENT		0xc
+#define MVPP2_MIB_UNICAST_FRAMES_RCVD		0x10
+#define MVPP2_MIB_BROADCAST_FRAMES_RCVD		0x18
+#define MVPP2_MIB_MULTICAST_FRAMES_RCVD		0x1c
+#define MVPP2_MIB_FRAMES_64_OCTETS		0x20
+#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS	0x24
+#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS	0x28
+#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS	0x2c
+#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS	0x30
+#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS	0x34
+#define MVPP2_MIB_GOOD_OCTETS_SENT		0x38
+#define MVPP2_MIB_UNICAST_FRAMES_SENT		0x40
+#define MVPP2_MIB_MULTICAST_FRAMES_SENT		0x48
+#define MVPP2_MIB_BROADCAST_FRAMES_SENT		0x4c
+#define MVPP2_MIB_FC_SENT			0x54
+#define MVPP2_MIB_FC_RCVD			0x58
+#define MVPP2_MIB_RX_FIFO_OVERRUN		0x5c
+#define MVPP2_MIB_UNDERSIZE_RCVD		0x60
+#define MVPP2_MIB_FRAGMENTS_RCVD		0x64
+#define MVPP2_MIB_OVERSIZE_RCVD			0x68
+#define MVPP2_MIB_JABBER_RCVD			0x6c
+#define MVPP2_MIB_MAC_RCV_ERROR			0x70
+#define MVPP2_MIB_BAD_CRC_EVENT			0x74
+#define MVPP2_MIB_COLLISION			0x78
+#define MVPP2_MIB_LATE_COLLISION		0x7c
+
+#define MVPP2_MIB_COUNTERS_STATS_DELAY		(1 * HZ)
+
 /* Definitions */
 
 /* Shared Packet Processor resources */
@@ -796,6 +862,7 @@ struct mvpp2 {
 	struct clk *axi_clk;
 
 	/* List of pointers to port structures */
+	int port_count;
 	struct mvpp2_port **port_list;
 
 	/* Aggregated TXQs */
@@ -817,6 +884,10 @@ struct mvpp2 {
 
 	/* Maximum number of RXQs per port */
 	unsigned int max_port_rxqs;
+
+	/* Workqueue to gather hardware statistics */
+	char queue_name[30];
+	struct workqueue_struct *stats_queue;
 };
 
 struct mvpp2_pcpu_stats {
@@ -861,6 +932,7 @@ struct mvpp2_port {
 
 	/* Per-port registers' base address */
 	void __iomem *base;
+	void __iomem *stats_base;
 
 	struct mvpp2_rx_queue **rxqs;
 	unsigned int nrxqs;
@@ -879,6 +951,11 @@ struct mvpp2_port {
 	u16 tx_ring_size;
 	u16 rx_ring_size;
 	struct mvpp2_pcpu_stats __percpu *stats;
+	u64 *ethtool_stats;
+
+	/* Per-port work and its lock to gather hardware statistics */
+	struct mutex gather_stats_lock;
+	struct delayed_work stats_work;
 
 	phy_interface_t phy_interface;
 	struct device_node *phy_node;
@@ -1022,6 +1099,9 @@ struct mvpp2_txq_pcpu {
 	 */
 	int count;
 
+	int wake_threshold;
+	int stop_threshold;
+
 	/* Number of Tx DMA descriptors reserved for each CPU */
 	int reserved_num;
 
@@ -1257,13 +1337,20 @@ static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port,
 				      struct mvpp2_tx_desc *tx_desc,
 				      dma_addr_t dma_addr)
 {
+	dma_addr_t addr, offset;
+
+	addr = dma_addr & ~MVPP2_TX_DESC_ALIGN;
+	offset = dma_addr & MVPP2_TX_DESC_ALIGN;
+
 	if (port->priv->hw_version == MVPP21) {
-		tx_desc->pp21.buf_dma_addr = dma_addr;
+		tx_desc->pp21.buf_dma_addr = addr;
+		tx_desc->pp21.packet_offset = offset;
 	} else {
-		u64 val = (u64)dma_addr;
+		u64 val = (u64)addr;
 
 		tx_desc->pp22.buf_dma_addr_ptp &= ~GENMASK_ULL(40, 0);
 		tx_desc->pp22.buf_dma_addr_ptp |= val;
+		tx_desc->pp22.packet_offset = offset;
 	}
 }
 
@@ -1306,16 +1393,6 @@ static void mvpp2_txdesc_cmd_set(struct mvpp2_port *port,
 		tx_desc->pp22.command = command;
 }
 
-static void mvpp2_txdesc_offset_set(struct mvpp2_port *port,
-				    struct mvpp2_tx_desc *tx_desc,
-				    unsigned int offset)
-{
-	if (port->priv->hw_version == MVPP21)
-		tx_desc->pp21.packet_offset = offset;
-	else
-		tx_desc->pp22.packet_offset = offset;
-}
-
 static unsigned int mvpp2_txdesc_offset_get(struct mvpp2_port *port,
 					    struct mvpp2_tx_desc *tx_desc)
 {
@@ -4748,9 +4825,131 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port)
 	writel(val, port->base + MVPP2_GMAC_CTRL_1_REG);
 }
 
+struct mvpp2_ethtool_counter {
+	unsigned int offset;
+	const char string[ETH_GSTRING_LEN];
+	bool reg_is_64b;
+};
+
+static u64 mvpp2_read_count(struct mvpp2_port *port,
+			    const struct mvpp2_ethtool_counter *counter)
+{
+	u64 val;
+
+	val = readl(port->stats_base + counter->offset);
+	if (counter->reg_is_64b)
+		val += (u64)readl(port->stats_base + counter->offset + 4) << 32;
+
+	return val;
+}
+
+/* Due to the fact that software statistics and hardware statistics are, by
+ * design, incremented at different moments in the chain of packet processing,
+ * it is very likely that incoming packets could have been dropped after being
+ * counted by hardware but before reaching software statistics (most probably
+ * multicast packets), and in the oppposite way, during transmission, FCS bytes
+ * are added in between as well as TSO skb will be split and header bytes added.
+ * Hence, statistics gathered from userspace with ifconfig (software) and
+ * ethtool (hardware) cannot be compared.
+ */
+static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = {
+	{ MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true },
+	{ MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" },
+	{ MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" },
+	{ MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" },
+	{ MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" },
+	{ MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" },
+	{ MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" },
+	{ MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" },
+	{ MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" },
+	{ MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" },
+	{ MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" },
+	{ MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" },
+	{ MVPP2_MIB_GOOD_OCTETS_SENT, "good_octets_sent", true },
+	{ MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" },
+	{ MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" },
+	{ MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" },
+	{ MVPP2_MIB_FC_SENT, "fc_sent" },
+	{ MVPP2_MIB_FC_RCVD, "fc_received" },
+	{ MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" },
+	{ MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" },
+	{ MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" },
+	{ MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" },
+	{ MVPP2_MIB_JABBER_RCVD, "jabber_received" },
+	{ MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" },
+	{ MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" },
+	{ MVPP2_MIB_COLLISION, "collision" },
+	{ MVPP2_MIB_LATE_COLLISION, "late_collision" },
+};
+
+static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
+				      u8 *data)
+{
+	if (sset == ETH_SS_STATS) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN);
+	}
+}
+
+static void mvpp2_gather_hw_statistics(struct work_struct *work)
+{
+	struct delayed_work *del_work = to_delayed_work(work);
+	struct mvpp2_port *port = container_of(del_work, struct mvpp2_port,
+					       stats_work);
+	u64 *pstats;
+	int i;
+
+	mutex_lock(&port->gather_stats_lock);
+
+	pstats = port->ethtool_stats;
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
+		*pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
+
+	/* No need to read again the counters right after this function if it
+	 * was called asynchronously by the user (ie. use of ethtool).
+	 */
+	cancel_delayed_work(&port->stats_work);
+	queue_delayed_work(port->priv->stats_queue, &port->stats_work,
+			   MVPP2_MIB_COUNTERS_STATS_DELAY);
+
+	mutex_unlock(&port->gather_stats_lock);
+}
+
+static void mvpp2_ethtool_get_stats(struct net_device *dev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	/* Update statistics for the given port, then take the lock to avoid
+	 * concurrent accesses on the ethtool_stats structure during its copy.
+	 */
+	mvpp2_gather_hw_statistics(&port->stats_work.work);
+
+	mutex_lock(&port->gather_stats_lock);
+	memcpy(data, port->ethtool_stats,
+	       sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs));
+	mutex_unlock(&port->gather_stats_lock);
+}
+
+static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return ARRAY_SIZE(mvpp2_ethtool_regs);
+
+	return -EOPNOTSUPP;
+}
+
 static void mvpp2_port_reset(struct mvpp2_port *port)
 {
 	u32 val;
+	unsigned int i;
+
+	/* Read the GOP statistics to reset the hardware counters */
+	for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
+		mvpp2_read_count(port, &mvpp2_ethtool_regs[i]);
 
 	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
 		    ~MVPP2_GMAC_PORT_RESET_MASK;
@@ -5022,7 +5221,7 @@ static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending)
 static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
 				     struct mvpp2_tx_queue *aggr_txq, int num)
 {
-	if ((aggr_txq->count + num) > aggr_txq->size) {
+	if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) {
 		/* Update number of occupied aggregated Tx descriptors */
 		int cpu = smp_processor_id();
 		u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu));
@@ -5030,7 +5229,7 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
 		aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
 	}
 
-	if ((aggr_txq->count + num) > aggr_txq->size)
+	if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE)
 		return -ENOMEM;
 
 	return 0;
@@ -5370,7 +5569,7 @@ static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
 	txq_pcpu->count -= tx_done;
 
 	if (netif_tx_queue_stopped(nq))
-		if (txq_pcpu->size - txq_pcpu->count >= MAX_SKB_FRAGS + 1)
+		if (txq_pcpu->count <= txq_pcpu->wake_threshold)
 			netif_tx_wake_queue(nq);
 }
 
@@ -5414,7 +5613,7 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
 	if (!aggr_txq->descs)
 		return -ENOMEM;
 
-	aggr_txq->last_desc = aggr_txq->size - 1;
+	aggr_txq->last_desc = MVPP2_AGGR_TXQ_SIZE - 1;
 
 	/* Aggr TXQ no reset WA */
 	aggr_txq->next_desc_to_proc = mvpp2_read(priv,
@@ -5613,6 +5812,9 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 		txq_pcpu->txq_put_index = 0;
 		txq_pcpu->txq_get_index = 0;
 
+		txq_pcpu->stop_threshold = txq->size - MVPP2_MAX_SKB_DESCS;
+		txq_pcpu->wake_threshold = txq_pcpu->stop_threshold / 2;
+
 		txq_pcpu->tso_headers =
 			dma_alloc_coherent(port->dev->dev.parent,
 					   txq_pcpu->size * TSO_HEADER_SIZE,
@@ -6256,10 +6458,7 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
 			goto cleanup;
 		}
 
-		mvpp2_txdesc_offset_set(port, tx_desc,
-					buf_dma_addr & MVPP2_TX_DESC_ALIGN);
-		mvpp2_txdesc_dma_addr_set(port, tx_desc,
-					  buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
+		mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr);
 
 		if (i == (skb_shinfo(skb)->nr_frags - 1)) {
 			/* Last descriptor */
@@ -6302,8 +6501,7 @@ static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
 
 	addr = txq_pcpu->tso_headers_dma +
 	       txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
-	mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
-	mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);
+	mvpp2_txdesc_dma_addr_set(port, tx_desc, addr);
 
 	mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
 					    MVPP2_TXD_F_DESC |
@@ -6332,10 +6530,7 @@ static inline int mvpp2_tso_put_data(struct sk_buff *skb,
 		return -ENOMEM;
 	}
 
-	mvpp2_txdesc_offset_set(port, tx_desc,
-				buf_dma_addr & MVPP2_TX_DESC_ALIGN);
-	mvpp2_txdesc_dma_addr_set(port, tx_desc,
-				  buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
+	mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr);
 
 	if (!left) {
 		mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
@@ -6447,10 +6642,7 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
-	mvpp2_txdesc_offset_set(port, tx_desc,
-				buf_dma_addr & MVPP2_TX_DESC_ALIGN);
-	mvpp2_txdesc_dma_addr_set(port, tx_desc,
-				  buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
+	mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr);
 
 	tx_cmd = mvpp2_skb_tx_csum(port, skb);
 
@@ -6469,7 +6661,6 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 		if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) {
 			tx_desc_unmap_put(port, txq, tx_desc);
 			frags = 0;
-			goto out;
 		}
 	}
 
@@ -6486,7 +6677,7 @@ out:
 		wmb();
 		mvpp2_aggr_txq_pend_desc_add(port, frags);
 
-		if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
+		if (txq_pcpu->count >= txq_pcpu->stop_threshold)
 			netif_tx_stop_queue(nq);
 
 		u64_stats_update_begin(&stats->syncp);
@@ -6784,6 +6975,39 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
 	}
 }
 
+static void mvpp22_init_rss(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	int i;
+
+	/* Set the table width: replace the whole classifier Rx queue number
+	 * with the ones configured in RSS table entries.
+	 */
+	mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(0));
+	mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+
+	/* Loop through the classifier Rx Queues and map them to a RSS table.
+	 * Map them all to the first table (0) by default.
+	 */
+	for (i = 0; i < MVPP2_CLS_RX_QUEUES; i++) {
+		mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(i));
+		mvpp2_write(priv, MVPP22_RSS_TABLE,
+			    MVPP22_RSS_TABLE_POINTER(0));
+	}
+
+	/* Configure the first table to evenly distribute the packets across
+	 * real Rx Queues. The table entries map a hash to an port Rx Queue.
+	 */
+	for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
+		u32 sel = MVPP22_RSS_INDEX_TABLE(0) |
+			  MVPP22_RSS_INDEX_TABLE_ENTRY(i);
+		mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
+
+		mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY, i % port->nrxqs);
+	}
+
+}
+
 static int mvpp2_open(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
@@ -6858,6 +7082,13 @@ static int mvpp2_open(struct net_device *dev)
 
 	mvpp2_start_dev(port);
 
+	if (priv->hw_version == MVPP22)
+		mvpp22_init_rss(port);
+
+	/* Start hardware statistics gathering */
+	queue_delayed_work(priv->stats_queue, &port->stats_work,
+			   MVPP2_MIB_COUNTERS_STATS_DELAY);
+
 	return 0;
 
 err_free_link_irq:
@@ -6902,6 +7133,8 @@ static int mvpp2_stop(struct net_device *dev)
 	mvpp2_cleanup_rxqs(port);
 	mvpp2_cleanup_txqs(port);
 
+	cancel_delayed_work_sync(&port->stats_work);
+
 	return 0;
 }
 
@@ -7213,6 +7446,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.get_drvinfo	= mvpp2_ethtool_get_drvinfo,
 	.get_ringparam	= mvpp2_ethtool_get_ringparam,
 	.set_ringparam	= mvpp2_ethtool_set_ringparam,
+	.get_strings	= mvpp2_ethtool_get_strings,
+	.get_ethtool_stats = mvpp2_ethtool_get_stats,
+	.get_sset_count	= mvpp2_ethtool_get_sset_count,
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
@@ -7616,6 +7852,10 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 			err = PTR_ERR(port->base);
 			goto err_free_irq;
 		}
+
+		port->stats_base = port->priv->lms_base +
+				   MVPP21_MIB_COUNTERS_OFFSET +
+				   port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
 	} else {
 		if (of_property_read_u32(port_node, "gop-port-id",
 					 &port->gop_id)) {
@@ -7625,15 +7865,29 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		}
 
 		port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
+		port->stats_base = port->priv->iface_base +
+				   MVPP22_MIB_COUNTERS_OFFSET +
+				   port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ;
 	}
 
-	/* Alloc per-cpu stats */
+	/* Alloc per-cpu and ethtool stats */
 	port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
 	if (!port->stats) {
 		err = -ENOMEM;
 		goto err_free_irq;
 	}
 
+	port->ethtool_stats = devm_kcalloc(&pdev->dev,
+					   ARRAY_SIZE(mvpp2_ethtool_regs),
+					   sizeof(u64), GFP_KERNEL);
+	if (!port->ethtool_stats) {
+		err = -ENOMEM;
+		goto err_free_stats;
+	}
+
+	mutex_init(&port->gather_stats_lock);
+	INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics);
+
 	mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
 
 	port->tx_ring_size = MVPP2_MAX_TXD;
@@ -7678,6 +7932,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->features = features | NETIF_F_RXCSUM;
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
 	dev->vlan_features |= features;
+	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
 
 	/* MTU range: 68 - 9676 */
 	dev->min_mtu = ETH_MIN_MTU;
@@ -7769,9 +8024,9 @@ static void mvpp2_rx_fifo_init(struct mvpp2 *priv)
 
 	for (port = 0; port < MVPP2_MAX_PORTS; port++) {
 		mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port),
-			    MVPP2_RX_FIFO_PORT_DATA_SIZE);
+			    MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB);
 		mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port),
-			    MVPP2_RX_FIFO_PORT_ATTR_SIZE);
+			    MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB);
 	}
 
 	mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG,
@@ -7779,6 +8034,49 @@ static void mvpp2_rx_fifo_init(struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
+static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
+{
+	int port;
+
+	/* The FIFO size parameters are set depending on the maximum speed a
+	 * given port can handle:
+	 * - Port 0: 10Gbps
+	 * - Port 1: 2.5Gbps
+	 * - Ports 2 and 3: 1Gbps
+	 */
+
+	mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(0),
+		    MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB);
+	mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(0),
+		    MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB);
+
+	mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(1),
+		    MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB);
+	mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(1),
+		    MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB);
+
+	for (port = 2; port < MVPP2_MAX_PORTS; port++) {
+		mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port),
+			    MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB);
+		mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port),
+			    MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB);
+	}
+
+	mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG,
+		    MVPP2_RX_FIFO_PORT_MIN_PKT);
+	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
+}
+
+/* Initialize Tx FIFO's */
+static void mvpp22_tx_fifo_init(struct mvpp2 *priv)
+{
+	int port;
+
+	for (port = 0; port < MVPP2_MAX_PORTS; port++)
+		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port),
+			    MVPP22_TX_FIFO_DATA_SIZE_3KB);
+}
+
 static void mvpp2_axi_init(struct mvpp2 *priv)
 {
 	u32 val, rdval, wrval;
@@ -7874,8 +8172,13 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 			return err;
 	}
 
-	/* Rx Fifo Init */
-	mvpp2_rx_fifo_init(priv);
+	/* Fifo Init */
+	if (priv->hw_version == MVPP21) {
+		mvpp2_rx_fifo_init(priv);
+	} else {
+		mvpp22_rx_fifo_init(priv);
+		mvpp22_tx_fifo_init(priv);
+	}
 
 	if (priv->hw_version == MVPP21)
 		writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
@@ -7907,7 +8210,7 @@ static int mvpp2_probe(struct platform_device *pdev)
 	struct mvpp2 *priv;
 	struct resource *res;
 	void __iomem *base;
-	int port_count, i;
+	int i;
 	int err;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -8022,14 +8325,14 @@ static int mvpp2_probe(struct platform_device *pdev)
 		goto err_mg_clk;
 	}
 
-	port_count = of_get_available_child_count(dn);
-	if (port_count == 0) {
+	priv->port_count = of_get_available_child_count(dn);
+	if (priv->port_count == 0) {
 		dev_err(&pdev->dev, "no ports enabled\n");
 		err = -ENODEV;
 		goto err_mg_clk;
 	}
 
-	priv->port_list = devm_kcalloc(&pdev->dev, port_count,
+	priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count,
 				       sizeof(*priv->port_list),
 				       GFP_KERNEL);
 	if (!priv->port_list) {
@@ -8046,6 +8349,21 @@ static int mvpp2_probe(struct platform_device *pdev)
 		i++;
 	}
 
+	/* Statistics must be gathered regularly because some of them (like
+	 * packets counters) are 32-bit registers and could overflow quite
+	 * quickly. For instance, a 10Gb link used at full bandwidth with the
+	 * smallest packets (64B) will overflow a 32-bit counter in less than
+	 * 30 seconds. Then, use a workqueue to fill 64-bit counters.
+	 */
+	snprintf(priv->queue_name, sizeof(priv->queue_name),
+		 "stats-wq-%s%s", netdev_name(priv->port_list[0]->dev),
+		 priv->port_count > 1 ? "+" : "");
+	priv->stats_queue = create_singlethread_workqueue(priv->queue_name);
+	if (!priv->stats_queue) {
+		err = -ENOMEM;
+		goto err_mg_clk;
+	}
+
 	platform_set_drvdata(pdev, priv);
 	return 0;
 
@@ -8067,9 +8385,14 @@ static int mvpp2_remove(struct platform_device *pdev)
 	struct device_node *port_node;
 	int i = 0;
 
+	flush_workqueue(priv->stats_queue);
+	destroy_workqueue(priv->stats_queue);
+
 	for_each_available_child_of_node(dn, port_node) {
-		if (priv->port_list[i])
+		if (priv->port_list[i]) {
+			mutex_destroy(&priv->port_list[i]->gather_stats_lock);
 			mvpp2_port_remove(priv->port_list[i]);
+		}
 		i++;
 	}
 
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 993724959a7c..91b1c154fd29 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1496,9 +1496,8 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size);
 
 	memset(&pep->timeout, 0, sizeof(struct timer_list));
-	init_timer(&pep->timeout);
-	pep->timeout.function = rxq_refill_timer_wrapper;
-	pep->timeout.data = (unsigned long)pep;
+	setup_timer(&pep->timeout, rxq_refill_timer_wrapper,
+		    (unsigned long)pep);
 
 	pep->smi_bus = mdiobus_alloc();
 	if (!pep->smi_bus) {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 5e81a7263654..54adfd967858 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1817,7 +1817,7 @@ static int mtk_open(struct net_device *dev)
 	struct mtk_eth *eth = mac->hw;
 
 	/* we run 2 netdevs on the same dma ring so we only bring it up once */
-	if (!atomic_read(&eth->dma_refcnt)) {
+	if (!refcount_read(&eth->dma_refcnt)) {
 		int err = mtk_start_dma(eth);
 
 		if (err)
@@ -1827,8 +1827,10 @@ static int mtk_open(struct net_device *dev)
 		napi_enable(&eth->rx_napi);
 		mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
 		mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
+		refcount_set(&eth->dma_refcnt, 1);
 	}
-	atomic_inc(&eth->dma_refcnt);
+	else
+		refcount_inc(&eth->dma_refcnt);
 
 	phy_start(dev->phydev);
 	netif_start_queue(dev);
@@ -1868,7 +1870,7 @@ static int mtk_stop(struct net_device *dev)
 	phy_stop(dev->phydev);
 
 	/* only shutdown DMA if this is the last user */
-	if (!atomic_dec_and_test(&eth->dma_refcnt))
+	if (!refcount_dec_and_test(&eth->dma_refcnt))
 		return 0;
 
 	mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 3d3c24a28112..a3af4660de81 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,8 @@
 #ifndef MTK_ETH_H
 #define MTK_ETH_H
 
+#include <linux/refcount.h>
+
 #define MTK_QDMA_PAGE_SIZE	2048
 #define	MTK_MAX_RX_LENGTH	1536
 #define MTK_TX_DMA_BUF_LEN	0x3fff
@@ -632,7 +634,7 @@ struct mtk_eth {
 	struct regmap			*pctl;
 	u32				chip_id;
 	bool				hwlro;
-	atomic_t			dma_refcnt;
+	refcount_t			dma_refcnt;
 	struct mtk_tx_ring		tx_ring;
 	struct mtk_rx_ring		rx_ring[MTK_MAX_RX_RING_NUM];
 	struct mtk_rx_ring		rx_ring_qdma;
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 22b1cc012bc9..36054e6fb9d3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -38,3 +38,11 @@ config MLX4_DEBUG
 	  mlx4_core driver.  The output can be turned on via the
 	  debug_level module parameter (which can also be set after
 	  the driver is loaded through sysfs).
+
+config MLX4_CORE_GEN2
+	bool "Support for old gen2 Mellanox PCI IDs" if (MLX4_CORE)
+	depends on MLX4_CORE
+	default y
+	---help---
+	  Say Y here if you want to use old gen2 Mellanox devices in the
+	  driver.
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 53daa6ca5d83..e2b6b0cac1ac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -231,10 +231,10 @@ static void dump_err_buf(struct mlx4_dev *dev)
 			 i, swab32(readl(priv->catas_err.map + i)));
 }
 
-static void poll_catas(unsigned long dev_ptr)
+static void poll_catas(struct timer_list *t)
 {
-	struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
-	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer);
+	struct mlx4_dev *dev = &priv->dev;
 	u32 slave_read;
 
 	if (mlx4_is_slave(dev)) {
@@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
 	phys_addr_t addr;
 
 	INIT_LIST_HEAD(&priv->catas_err.list);
-	init_timer(&priv->catas_err.timer);
+	timer_setup(&priv->catas_err.timer, poll_catas, 0);
 	priv->catas_err.map = NULL;
 
 	if (!mlx4_is_slave(dev)) {
@@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
 		}
 	}
 
-	priv->catas_err.timer.data     = (unsigned long) dev;
-	priv->catas_err.timer.function = poll_catas;
 	priv->catas_err.timer.expires  =
 		round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
 	add_timer(&priv->catas_err.timer);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 72eb50cd5ecd..d8e9a323122e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -69,7 +69,7 @@ void mlx4_cq_tasklet_cb(unsigned long data)
 	list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) {
 		list_del_init(&mcq->tasklet_ctx.list);
 		mcq->tasklet_ctx.comp(mcq);
-		if (atomic_dec_and_test(&mcq->refcount))
+		if (refcount_dec_and_test(&mcq->refcount))
 			complete(&mcq->free);
 		if (time_after(jiffies, end))
 			break;
@@ -92,7 +92,7 @@ static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq)
 	 * still arrive.
 	 */
 	if (list_empty_careful(&cq->tasklet_ctx.list)) {
-		atomic_inc(&cq->refcount);
+		refcount_inc(&cq->refcount);
 		kick = list_empty(&tasklet_ctx->list);
 		list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
 		if (kick)
@@ -344,7 +344,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
 	cq->cons_index = 0;
 	cq->arm_sn     = 1;
 	cq->uar        = uar;
-	atomic_set(&cq->refcount, 1);
+	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	cq->comp = mlx4_add_cq_to_tasklet;
 	cq->tasklet_ctx.priv =
@@ -386,7 +386,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
 	    priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
 		synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
 
-	if (atomic_dec_and_test(&cq->refcount))
+	if (refcount_dec_and_test(&cq->refcount))
 		complete(&cq->free);
 	wait_for_completion(&cq->free);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 3d4e4a5d00d1..bf1f04164885 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1742,13 +1742,18 @@ static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 	return err;
 }
 
+static int mlx4_en_get_max_num_rx_rings(struct net_device *dev)
+{
+	return min_t(int, num_online_cpus(), MAX_RX_RINGS);
+}
+
 static void mlx4_en_get_channels(struct net_device *dev,
 				 struct ethtool_channels *channel)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 
-	channel->max_rx = MAX_RX_RINGS;
-	channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
+	channel->max_rx = mlx4_en_get_max_num_rx_rings(dev);
+	channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up;
 
 	channel->rx_count = priv->rx_ring_num;
 	channel->tx_count = priv->tx_ring_num[TX] /
@@ -1777,7 +1782,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	mutex_lock(&mdev->state_lock);
 	xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
 	if (channel->tx_count * priv->prof->num_up + xdp_count >
-	    MAX_TX_RINGS) {
+	    priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) {
 		err = -EINVAL;
 		en_err(priv,
 		       "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 686e18de9a97..2c2965497ed3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -153,7 +153,7 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 	int i;
 
 	params->udp_rss = udp_rss;
-	params->num_tx_rings_p_up = mlx4_low_memory_profile() ?
+	params->max_num_tx_rings_p_up = mlx4_low_memory_profile() ?
 		MLX4_EN_MIN_TX_RING_P_UP :
 		min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP);
 
@@ -170,8 +170,8 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 		params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
 		params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
 		params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
-		params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up;
-		params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
+		params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up;
+		params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up *
 			params->prof[i].num_up;
 		params->prof[i].rss_rings = 0;
 		params->prof[i].inline_thold = inline_thold;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 9c218f1cfc6c..99051a294fa6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -135,7 +135,7 @@ static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type,
 {
 	struct tc_mqprio_qopt *mqprio = type_data;
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
@@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev)
 				mlx4_en_arm_cq(priv, cq);
 
 			} else {
+				mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring);
 				mlx4_en_init_recycle_ring(priv, i);
 				/* XDP TX CQ should never be armed */
 			}
@@ -2915,7 +2916,7 @@ static u32 mlx4_xdp_query(struct net_device *dev)
 	return prog_id;
 }
 
-static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
@@ -2957,7 +2958,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_udp_tunnel_del	= mlx4_en_del_vxlan_port,
 	.ndo_features_check	= mlx4_en_features_check,
 	.ndo_set_tx_maxrate	= mlx4_en_set_tx_maxrate,
-	.ndo_xdp		= mlx4_xdp,
+	.ndo_bpf		= mlx4_xdp,
 };
 
 static const struct net_device_ops mlx4_netdev_ops_master = {
@@ -2994,7 +2995,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
 	.ndo_udp_tunnel_del	= mlx4_en_del_vxlan_port,
 	.ndo_features_check	= mlx4_en_features_check,
 	.ndo_set_tx_maxrate	= mlx4_en_set_tx_maxrate,
-	.ndo_xdp		= mlx4_xdp,
+	.ndo_bpf		= mlx4_xdp,
 };
 
 struct mlx4_en_bond {
@@ -3305,7 +3306,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->pflags = MLX4_EN_PRIV_FLAGS_BLUEFLAME;
 	priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
 			MLX4_WQE_CTRL_SOLICITED);
-	priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up;
+	priv->num_tx_rings_p_up = mdev->profile.max_num_tx_rings_p_up;
 	priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK;
 	netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key));
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 5a47f9669621..6883ac75d37f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -53,7 +53,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 	if (is_tx) {
 		context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
 		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)
-			context->params2 |= MLX4_QP_BIT_FPP;
+			context->params2 |= cpu_to_be32(MLX4_QP_BIT_FPP);
 
 	} else {
 		context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b97a55c827eb..85e28efcda33 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -193,7 +193,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
 
 			if (mlx4_en_prepare_rx_desc(priv, ring,
 						    ring->actual_size,
-						    GFP_KERNEL | __GFP_COLD)) {
+						    GFP_KERNEL)) {
 				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
 					en_err(priv, "Failed to allocate enough rx buffers\n");
 					return -ENOMEM;
@@ -254,8 +254,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
 					 DEF_RX_RINGS));
 
 		num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS :
-			min_t(int, num_of_eqs,
-			      netif_get_num_default_rss_queues());
+			min_t(int, num_of_eqs, num_online_cpus());
 		mdev->profile.prof[i].rx_ring_num =
 			rounddown_pow_of_two(num_rx_rings);
 	}
@@ -552,8 +551,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
 	do {
 		if (mlx4_en_prepare_rx_desc(priv, ring,
 					    ring->prod & ring->size_mask,
-					    GFP_ATOMIC | __GFP_COLD |
-					    __GFP_MEMALLOC))
+					    GFP_ATOMIC | __GFP_MEMALLOC))
 			break;
 		ring->prod++;
 	} while (likely(--missing));
@@ -762,6 +760,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 			xdp.data_hard_start = va - frags[0].page_offset;
 			xdp.data = va;
+			xdp_set_data_meta_invalid(&xdp);
 			xdp.data_end = xdp.data + length;
 			orig_data = xdp.data;
 
@@ -778,7 +777,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			case XDP_PASS:
 				break;
 			case XDP_TX:
-				if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
+				if (likely(!mlx4_en_xmit_frame(ring, frags, priv,
 							length, cq_ring,
 							&doorbell_pending))) {
 					frags[0].page = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 3541a7f9d12e..6b6853773848 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -718,7 +718,7 @@ void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
 #else
 	iowrite32be(
 #endif
-		  ring->doorbell_qpn,
+		  (__force u32)ring->doorbell_qpn,
 		  ring->bf.uar->map + MLX4_SEND_DOORBELL);
 }
 
@@ -1085,13 +1085,35 @@ tx_drop:
 #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
 				 / 16) & 0x3f)
 
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+				    struct mlx4_en_tx_ring *ring)
+{
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
+		struct mlx4_en_tx_desc *tx_desc = ring->buf +
+			(i << LOG_TXBB_SIZE);
+
+		tx_info->map0_byte_count = PAGE_SIZE;
+		tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
+		tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
+		tx_info->ts_requested = 0;
+		tx_info->nr_maps = 1;
+		tx_info->linear = 1;
+		tx_info->inl = 0;
+
+		tx_desc->data.lkey = ring->mr_key;
+		tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+		tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+	}
+}
+
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,
-			       struct net_device *dev, unsigned int length,
+			       struct mlx4_en_priv *priv, unsigned int length,
 			       int tx_ind, bool *doorbell_pending)
 {
-	struct mlx4_en_priv *priv = netdev_priv(dev);
-	union mlx4_wqe_qpn_vlan	qpn_vlan = {};
 	struct mlx4_en_tx_desc *tx_desc;
 	struct mlx4_en_tx_info *tx_info;
 	struct mlx4_wqe_data_seg *data;
@@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 	tx_info->page = frame->page;
 	frame->page = NULL;
 	tx_info->map0_dma = dma;
-	tx_info->map0_byte_count = PAGE_SIZE;
-	tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
 	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
-	tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
-	tx_info->ts_requested = 0;
-	tx_info->nr_maps = 1;
-	tx_info->linear = 1;
-	tx_info->inl = 0;
 
 	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
 					 length, PCI_DMA_TODEVICE);
 
 	data->addr = cpu_to_be64(dma + frame->page_offset);
-	data->lkey = ring->mr_key;
 	dma_wmb();
 	data->byte_count = cpu_to_be32(length);
 
 	/* tx completion can avoid cache line miss for common cases */
-	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 
 	op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
 		((ring->prod & ring->size) ?
@@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 
 	ring->prod += MLX4_EN_XDP_TX_NRTXBB;
 
-	qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+	/* Ensure new descriptor hits memory
+	 * before setting ownership of this descriptor to HW
+	 */
+	dma_wmb();
+	tx_desc->ctrl.owner_opcode = op_own;
+	ring->xmit_more++;
 
-	mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
-			      op_own, false, false);
 	*doorbell_pending = true;
 
 	return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 16c09949afd5..634f603f941c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -57,12 +57,12 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)");
 #define MLX4_GET(dest, source, offset)				      \
 	do {							      \
 		void *__p = (char *) (source) + (offset);	      \
-		u64 val;                                              \
-		switch (sizeof(dest)) {			      \
+		__be64 val;                                           \
+		switch (sizeof(dest)) {				      \
 		case 1: (dest) = *(u8 *) __p;	    break;	      \
 		case 2: (dest) = be16_to_cpup(__p); break;	      \
 		case 4: (dest) = be32_to_cpup(__p); break;	      \
-		case 8: val = get_unaligned((u64 *)__p);              \
+		case 8: val = get_unaligned((__be64 *)__p);           \
 			(dest) = be64_to_cpu(val);  break;            \
 		default: __buggy_use_of_MLX4_GET();		      \
 		}						      \
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index e61c99ef741d..4d84cab77105 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -4066,6 +4066,7 @@ int mlx4_restart_one(struct pci_dev *pdev)
 #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 }
 
 static const struct pci_device_id mlx4_pci_table[] = {
+#ifdef CONFIG_MLX4_CORE_GEN2
 	/* MT25408 "Hermon" */
 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR),	/* SDR */
 	MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR),	/* DDR */
@@ -4085,6 +4086,7 @@ static const struct pci_device_id mlx4_pci_table[] = {
 	MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2),
 	/* MT25400 Family [ConnectX-2] */
 	MLX_VF(0x1002),					/* Virtual Function */
+#endif /* CONFIG_MLX4_CORE_GEN2 */
 	/* MT27500 Family [ConnectX-3] */
 	MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3),
 	MLX_VF(0x1004),					/* Virtual Function */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index fdb3ad0cbe54..1856e279a7e0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -399,7 +399,7 @@ struct mlx4_en_profile {
 	u32 active_ports;
 	u32 small_pkt_int;
 	u8 no_reset;
-	u8 num_tx_rings_p_up;
+	u8 max_num_tx_rings_p_up;
 	struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1];
 };
 
@@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,
-			       struct net_device *dev, unsigned int length,
+			       struct mlx4_en_priv *priv, unsigned int length,
 			       int tx_ind, bool *doorbell_pending);
 void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
 bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
@@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   int node, int queue_index);
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring **pring);
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+				    struct mlx4_en_tx_ring *ring);
 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring *ring,
 			     int cq, int user_prio);
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 728a2fb1f5c0..769598f7b6c8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -55,7 +55,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
 
 	qp = __mlx4_qp_lookup(dev, qpn);
 	if (qp)
-		atomic_inc(&qp->refcount);
+		refcount_inc(&qp->refcount);
 
 	spin_unlock(&qp_table->lock);
 
@@ -66,7 +66,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
 
 	qp->event(qp, event_type);
 
-	if (atomic_dec_and_test(&qp->refcount))
+	if (refcount_dec_and_test(&qp->refcount))
 		complete(&qp->free);
 }
 
@@ -420,7 +420,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
 	if (err)
 		goto err_icm;
 
-	atomic_set(&qp->refcount, 1);
+	refcount_set(&qp->refcount, 1);
 	init_completion(&qp->free);
 
 	return 0;
@@ -520,7 +520,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove);
 
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
 {
-	if (atomic_dec_and_test(&qp->refcount))
+	if (refcount_dec_and_test(&qp->refcount))
 		complete(&qp->free);
 	wait_for_completion(&qp->free);
 
@@ -925,7 +925,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		context->flags &= cpu_to_be32(~(0xf << 28));
 		context->flags |= cpu_to_be32(states[i + 1] << 28);
 		if (states[i + 1] != MLX4_QP_STATE_RTR)
-			context->params2 &= ~MLX4_QP_BIT_FPP;
+			context->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP);
 		err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
 				     context, 0, 0, qp);
 		if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index fabb53379727..04304dd894c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -3185,7 +3185,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 	optpar	= be32_to_cpu(*(__be32 *) inbox->buf);
 
 	if (slave != mlx4_master_func_num(dev)) {
-		qp_ctx->params2 &= ~MLX4_QP_BIT_FPP;
+		qp_ctx->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP);
 		/* setting QP rate-limit is disallowed for VFs */
 		if (qp_ctx->rate_limit_params)
 			return -EPERM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index bedf52126824..cbe4d9746ddf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -49,7 +49,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
 	srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
 	rcu_read_unlock();
 	if (srq)
-		atomic_inc(&srq->refcount);
+		refcount_inc(&srq->refcount);
 	else {
 		mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
 		return;
@@ -57,7 +57,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
 
 	srq->event(srq, event_type);
 
-	if (atomic_dec_and_test(&srq->refcount))
+	if (refcount_dec_and_test(&srq->refcount))
 		complete(&srq->free);
 }
 
@@ -203,7 +203,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
 	if (err)
 		goto err_radix;
 
-	atomic_set(&srq->refcount, 1);
+	refcount_set(&srq->refcount, 1);
 	init_completion(&srq->free);
 
 	return 0;
@@ -232,7 +232,7 @@ void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
 	radix_tree_delete(&srq_table->tree, srq->srqn);
 	spin_unlock_irq(&srq_table->lock);
 
-	if (atomic_dec_and_test(&srq->refcount))
+	if (refcount_dec_and_test(&srq->refcount))
 		complete(&srq->free);
 	wait_for_completion(&srq->free);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index fdaef00465d7..25deaa5a534c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -6,6 +6,7 @@ config MLX5_CORE
 	tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver"
 	depends on MAY_USE_DEVLINK
 	depends on PCI
+	imply PTP_1588_CLOCK
 	default n
 	---help---
 	  Core driver for low level functionality of the ConnectX-4 and
@@ -29,7 +30,6 @@ config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
 	depends on IPV6=y || IPV6=n || MLX5_CORE=m
-	imply PTP_1588_CLOCK
 	default n
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 714dd0dc5eef..19b21b40ab07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -5,7 +5,7 @@ subdir-ccflags-y += -I$(src)
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
+		fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \
 		diag/fs_tracepoint.o
 
 mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
@@ -14,7 +14,7 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
 		fpga/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
-		en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
+		en_tx.o en_rx.o en_rx_am.o en_txrx.o en_stats.o vxlan.o \
 		en_arfs.o en_fs_ethtool.o en_selftest.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
@@ -23,7 +23,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o
 
 mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
 		en_accel/ipsec_stats.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 336d4738b807..1016e05c7ec7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
 				 tasklet_ctx.list) {
 		list_del_init(&mcq->tasklet_ctx.list);
 		mcq->tasklet_ctx.comp(mcq);
-		if (atomic_dec_and_test(&mcq->refcount))
+		if (refcount_dec_and_test(&mcq->refcount))
 			complete(&mcq->free);
 		if (time_after(jiffies, end))
 			break;
@@ -80,7 +80,7 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
 	 * still arrive.
 	 */
 	if (list_empty_careful(&cq->tasklet_ctx.list)) {
-		atomic_inc(&cq->refcount);
+		refcount_inc(&cq->refcount);
 		list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
 	}
 	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
@@ -94,7 +94,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
 	spin_lock(&table->lock);
 	cq = radix_tree_lookup(&table->tree, cqn);
 	if (likely(cq))
-		atomic_inc(&cq->refcount);
+		refcount_inc(&cq->refcount);
 	spin_unlock(&table->lock);
 
 	if (!cq) {
@@ -106,7 +106,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
 
 	cq->comp(cq);
 
-	if (atomic_dec_and_test(&cq->refcount))
+	if (refcount_dec_and_test(&cq->refcount))
 		complete(&cq->free);
 }
 
@@ -119,7 +119,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
 
 	cq = radix_tree_lookup(&table->tree, cqn);
 	if (cq)
-		atomic_inc(&cq->refcount);
+		refcount_inc(&cq->refcount);
 
 	spin_unlock(&table->lock);
 
@@ -130,7 +130,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
 
 	cq->event(cq, event_type);
 
-	if (atomic_dec_and_test(&cq->refcount))
+	if (refcount_dec_and_test(&cq->refcount))
 		complete(&cq->free);
 }
 
@@ -159,7 +159,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cqn = MLX5_GET(create_cq_out, out, cqn);
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
-	atomic_set(&cq->refcount, 1);
+	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
 		cq->comp = mlx5_add_cq_to_tasklet;
@@ -222,7 +222,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 	synchronize_irq(cq->irqn);
 
 	mlx5_debug_cq_remove(dev, cq);
-	if (atomic_dec_and_test(&cq->refcount))
+	if (refcount_dec_and_test(&cq->refcount))
 		complete(&cq->free);
 	wait_for_completion(&cq->free);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 13b5ef9d8703..c0872b3284cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -57,6 +57,7 @@
 #define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu))
 #define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu))
 
+#define MLX5E_MAX_DSCP          64
 #define MLX5E_MAX_NUM_TC	8
 
 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE                0x6
@@ -105,6 +106,7 @@
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS      0x20
 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC      0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10
 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS      0x20
 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES                0x80
 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW            0x2
@@ -126,6 +128,16 @@
 
 #define MLX5E_NUM_MAIN_GROUPS 9
 
+#define MLX5E_MSG_LEVEL			NETIF_MSG_LINK
+
+#define mlx5e_dbg(mlevel, priv, format, ...)                    \
+do {                                                            \
+	if (NETIF_MSG_##mlevel & (priv)->msglevel)              \
+		netdev_warn(priv->netdev, format,               \
+			    ##__VA_ARGS__);                     \
+} while (0)
+
+
 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
 {
 	switch (wq_type) {
@@ -187,12 +199,14 @@ extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
 
 static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
 	"rx_cqe_moder",
+	"tx_cqe_moder",
 	"rx_cqe_compress",
 };
 
 enum mlx5e_priv_flag {
 	MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
-	MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1),
+	MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
+	MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
 };
 
 #define MLX5E_SET_PFLAG(params, pflag, enable)			\
@@ -212,6 +226,7 @@ enum mlx5e_priv_flag {
 struct mlx5e_cq_moder {
 	u16 usec;
 	u16 pkts;
+	u8 cq_period_mode;
 };
 
 struct mlx5e_params {
@@ -223,7 +238,6 @@ struct mlx5e_params {
 	u8  log_rq_size;
 	u16 num_channels;
 	u8  num_tc;
-	u8  rx_cq_period_mode;
 	bool rx_cqe_compress_def;
 	struct mlx5e_cq_moder rx_cq_moderation;
 	struct mlx5e_cq_moder tx_cq_moderation;
@@ -260,34 +274,18 @@ enum {
 struct mlx5e_dcbx {
 	enum mlx5_dcbx_oper_mode   mode;
 	struct mlx5e_cee_config    cee_cfg; /* pending configuration */
+	u8                         dscp_app_cnt;
 
 	/* The only setting that cannot be read from FW */
 	u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
 	u8                         cap;
 };
-#endif
 
-#define MAX_PIN_NUM	8
-struct mlx5e_pps {
-	u8                         pin_caps[MAX_PIN_NUM];
-	struct work_struct         out_work;
-	u64                        start[MAX_PIN_NUM];
-	u8                         enabled;
-};
-
-struct mlx5e_tstamp {
-	rwlock_t                   lock;
-	struct cyclecounter        cycles;
-	struct timecounter         clock;
-	struct hwtstamp_config     hwtstamp_config;
-	u32                        nominal_c_mult;
-	unsigned long              overflow_period;
-	struct delayed_work        overflow_work;
-	struct mlx5_core_dev      *mdev;
-	struct ptp_clock          *ptp;
-	struct ptp_clock_info      ptp_info;
-	struct mlx5e_pps           pps_info;
+struct mlx5e_dcbx_dp {
+	u8                         dscp2prio[MLX5E_MAX_DSCP];
+	u8                         trust_state;
 };
+#endif
 
 enum {
 	MLX5E_RQ_STATE_ENABLED,
@@ -375,9 +373,10 @@ struct mlx5e_txqsq {
 	u8                         min_inline_mode;
 	u16                        edge;
 	struct device             *pdev;
-	struct mlx5e_tstamp       *tstamp;
 	__be32                     mkey_be;
 	unsigned long              state;
+	struct hwtstamp_config    *tstamp;
+	struct mlx5_clock         *clock;
 
 	/* control path */
 	struct mlx5_wq_ctrl        wq_ctrl;
@@ -543,10 +542,11 @@ struct mlx5e_rq {
 	struct mlx5e_channel  *channel;
 	struct device         *pdev;
 	struct net_device     *netdev;
-	struct mlx5e_tstamp   *tstamp;
 	struct mlx5e_rq_stats  stats;
 	struct mlx5e_cq        cq;
 	struct mlx5e_page_cache page_cache;
+	struct hwtstamp_config *tstamp;
+	struct mlx5_clock      *clock;
 
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 	mlx5e_fp_post_rx_wqes  post_wqes;
@@ -588,7 +588,7 @@ struct mlx5e_channel {
 	/* control */
 	struct mlx5e_priv         *priv;
 	struct mlx5_core_dev      *mdev;
-	struct mlx5e_tstamp       *tstamp;
+	struct hwtstamp_config    *tstamp;
 	int                        ix;
 };
 
@@ -655,12 +655,14 @@ struct mlx5e_tc_table {
 
 struct mlx5e_vlan_table {
 	struct mlx5e_flow_table		ft;
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-	struct mlx5_flow_handle	*active_vlans_rule[VLAN_N_VID];
+	DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
+	DECLARE_BITMAP(active_svlans, VLAN_N_VID);
+	struct mlx5_flow_handle	*active_cvlans_rule[VLAN_N_VID];
+	struct mlx5_flow_handle	*active_svlans_rule[VLAN_N_VID];
 	struct mlx5_flow_handle	*untagged_rule;
 	struct mlx5_flow_handle	*any_cvlan_rule;
 	struct mlx5_flow_handle	*any_svlan_rule;
-	bool			filter_disabled;
+	bool			cvlan_filter_disabled;
 };
 
 struct mlx5e_l2_table {
@@ -762,8 +764,12 @@ struct mlx5e_priv {
 	/* priv data path fields - start */
 	struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
 	int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	struct mlx5e_dcbx_dp       dcbx_dp;
+#endif
 	/* priv data path fields - end */
 
+	u32                        msglevel;
 	unsigned long              state;
 	struct mutex               state_lock; /* Protects Interface state */
 	struct mlx5e_rq            drop_rq;
@@ -789,7 +795,7 @@ struct mlx5e_priv {
 	struct mlx5_core_dev      *mdev;
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
-	struct mlx5e_tstamp        tstamp;
+	struct hwtstamp_config     tstamp;
 	u16 q_counter;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	struct mlx5e_dcbx          dcbx;
@@ -820,6 +826,8 @@ struct mlx5e_profile {
 		mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 		mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
 	} rx_handlers;
+	void	(*netdev_registered_init)(struct mlx5e_priv *priv);
+	void    (*netdev_registered_remove)(struct mlx5e_priv *priv);
 	int	max_tc;
 };
 
@@ -873,12 +881,6 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
 void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
 void mlx5e_set_rx_mode_work(struct work_struct *work);
 
-void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp,
-			struct skb_shared_hwtstamps *hwts);
-void mlx5e_timestamp_init(struct mlx5e_priv *priv);
-void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv);
-void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
-			     struct ptp_clock_event *event);
 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
 int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
 int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val);
@@ -887,8 +889,9 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
 			  u16 vid);
 int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
 			   u16 vid);
-void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv);
-void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
+void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
+void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
+void mlx5e_timestamp_set(struct mlx5e_priv *priv);
 
 struct mlx5e_redirect_rqt_param {
 	bool is_rss;
@@ -928,6 +931,8 @@ void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 				   int num_channels);
 int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
+				 u8 cq_period_mode);
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
 				 u8 cq_period_mode);
 void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
@@ -993,6 +998,8 @@ extern const struct ethtool_ops mlx5e_ethtool_ops;
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
 int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
 void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
 #endif
 
 #ifndef CONFIG_RFS_ACCEL
@@ -1045,6 +1052,9 @@ void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
 int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
 void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
 
+int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv);
+void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv);
+
 int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc,
 		     u32 underlay_qpn, u32 *tisn);
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
@@ -1081,6 +1091,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
 			       struct ethtool_flash *flash);
 
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+			    void *cb_priv);
+
 /* mlx5e generic netdev management API */
 struct net_device*
 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
@@ -1091,5 +1104,5 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 			    struct mlx5e_params *params,
 			    u16 max_channels);
-
+u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 4614ddfa91eb..6a7c8b04447e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -256,7 +256,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 			goto drop;
 		}
 	mdata = mlx5e_ipsec_add_metadata(skb);
-	if (unlikely(IS_ERR(mdata))) {
+	if (IS_ERR(mdata)) {
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
 		goto drop;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 12d3ced61114..610d485c4b03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -92,7 +92,7 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
 
 static int arfs_disable(struct mlx5e_priv *priv)
 {
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5e_tir *tir = priv->indir_tir;
 	int err = 0;
 	int tt;
@@ -126,7 +126,7 @@ int mlx5e_arfs_disable(struct mlx5e_priv *priv)
 
 int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 {
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	int err = 0;
 	int tt;
 	int i;
@@ -175,7 +175,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 {
 	struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
 	struct mlx5e_tir *tir = priv->indir_tir;
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	MLX5_DECLARE_FLOW_ACT(flow_act);
 	struct mlx5_flow_spec *spec;
 	enum mlx5e_traffic_types tt;
@@ -466,7 +466,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
 	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
 	struct arfs_tuple *tuple = &arfs_rule->tuple;
 	struct mlx5_flow_handle *rule = NULL;
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	MLX5_DECLARE_FLOW_ACT(flow_act);
 	struct arfs_table *arfs_table;
 	struct mlx5_flow_spec *spec;
@@ -557,7 +557,7 @@ out:
 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
 				struct mlx5_flow_handle *rule, u16 rxq)
 {
-	struct mlx5_flow_destination dst;
+	struct mlx5_flow_destination dst = {};
 	int err = 0;
 
 	dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
deleted file mode 100644
index 84dd63e74041..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ /dev/null
@@ -1,619 +0,0 @@
-/*
- * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/clocksource.h>
-#include "en.h"
-
-enum {
-	MLX5E_CYCLES_SHIFT	= 23
-};
-
-enum {
-	MLX5E_PIN_MODE_IN		= 0x0,
-	MLX5E_PIN_MODE_OUT		= 0x1,
-};
-
-enum {
-	MLX5E_OUT_PATTERN_PULSE		= 0x0,
-	MLX5E_OUT_PATTERN_PERIODIC	= 0x1,
-};
-
-enum {
-	MLX5E_EVENT_MODE_DISABLE	= 0x0,
-	MLX5E_EVENT_MODE_REPETETIVE	= 0x1,
-	MLX5E_EVENT_MODE_ONCE_TILL_ARM	= 0x2,
-};
-
-enum {
-	MLX5E_MTPPS_FS_ENABLE			= BIT(0x0),
-	MLX5E_MTPPS_FS_PATTERN			= BIT(0x2),
-	MLX5E_MTPPS_FS_PIN_MODE			= BIT(0x3),
-	MLX5E_MTPPS_FS_TIME_STAMP		= BIT(0x4),
-	MLX5E_MTPPS_FS_OUT_PULSE_DURATION	= BIT(0x5),
-	MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ		= BIT(0x7),
-};
-
-void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
-			struct skb_shared_hwtstamps *hwts)
-{
-	u64 nsec;
-
-	read_lock(&tstamp->lock);
-	nsec = timecounter_cyc2time(&tstamp->clock, timestamp);
-	read_unlock(&tstamp->lock);
-
-	hwts->hwtstamp = ns_to_ktime(nsec);
-}
-
-static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc)
-{
-	struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp,
-						   cycles);
-
-	return mlx5_read_internal_timer(tstamp->mdev) & cc->mask;
-}
-
-static void mlx5e_pps_out(struct work_struct *work)
-{
-	struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps,
-						  out_work);
-	struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp,
-						   pps_info);
-	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
-	unsigned long flags;
-	int i;
-
-	for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
-		u64 tstart;
-
-		write_lock_irqsave(&tstamp->lock, flags);
-		tstart = tstamp->pps_info.start[i];
-		tstamp->pps_info.start[i] = 0;
-		write_unlock_irqrestore(&tstamp->lock, flags);
-		if (!tstart)
-			continue;
-
-		MLX5_SET(mtpps_reg, in, pin, i);
-		MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
-		MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP);
-		mlx5_set_mtpps(tstamp->mdev, in, sizeof(in));
-	}
-}
-
-static void mlx5e_timestamp_overflow(struct work_struct *work)
-{
-	struct delayed_work *dwork = to_delayed_work(work);
-	struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
-						   overflow_work);
-	struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp);
-	unsigned long flags;
-
-	write_lock_irqsave(&tstamp->lock, flags);
-	timecounter_read(&tstamp->clock);
-	write_unlock_irqrestore(&tstamp->lock, flags);
-	queue_delayed_work(priv->wq, &tstamp->overflow_work,
-			   msecs_to_jiffies(tstamp->overflow_period * 1000));
-}
-
-int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
-{
-	struct hwtstamp_config config;
-	int err;
-
-	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
-		return -EOPNOTSUPP;
-
-	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-		return -EFAULT;
-
-	/* TX HW timestamp */
-	switch (config.tx_type) {
-	case HWTSTAMP_TX_OFF:
-	case HWTSTAMP_TX_ON:
-		break;
-	default:
-		return -ERANGE;
-	}
-
-	mutex_lock(&priv->state_lock);
-	/* RX HW timestamp */
-	switch (config.rx_filter) {
-	case HWTSTAMP_FILTER_NONE:
-		/* Reset CQE compression to Admin default */
-		mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
-		break;
-	case HWTSTAMP_FILTER_ALL:
-	case HWTSTAMP_FILTER_SOME:
-	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-	case HWTSTAMP_FILTER_NTP_ALL:
-		/* Disable CQE compression */
-		netdev_warn(priv->netdev, "Disabling cqe compression");
-		err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
-		if (err) {
-			netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
-			mutex_unlock(&priv->state_lock);
-			return err;
-		}
-		config.rx_filter = HWTSTAMP_FILTER_ALL;
-		break;
-	default:
-		mutex_unlock(&priv->state_lock);
-		return -ERANGE;
-	}
-
-	memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config));
-	mutex_unlock(&priv->state_lock);
-
-	return copy_to_user(ifr->ifr_data, &config,
-			    sizeof(config)) ? -EFAULT : 0;
-}
-
-int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
-{
-	struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config;
-
-	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
-		return -EOPNOTSUPP;
-
-	return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
-}
-
-static int mlx5e_ptp_settime(struct ptp_clock_info *ptp,
-			     const struct timespec64 *ts)
-{
-	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
-						   ptp_info);
-	u64 ns = timespec64_to_ns(ts);
-	unsigned long flags;
-
-	write_lock_irqsave(&tstamp->lock, flags);
-	timecounter_init(&tstamp->clock, &tstamp->cycles, ns);
-	write_unlock_irqrestore(&tstamp->lock, flags);
-
-	return 0;
-}
-
-static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp,
-			     struct timespec64 *ts)
-{
-	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
-						   ptp_info);
-	u64 ns;
-	unsigned long flags;
-
-	write_lock_irqsave(&tstamp->lock, flags);
-	ns = timecounter_read(&tstamp->clock);
-	write_unlock_irqrestore(&tstamp->lock, flags);
-
-	*ts = ns_to_timespec64(ns);
-
-	return 0;
-}
-
-static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
-{
-	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
-						   ptp_info);
-	unsigned long flags;
-
-	write_lock_irqsave(&tstamp->lock, flags);
-	timecounter_adjtime(&tstamp->clock, delta);
-	write_unlock_irqrestore(&tstamp->lock, flags);
-
-	return 0;
-}
-
-static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
-{
-	u64 adj;
-	u32 diff;
-	unsigned long flags;
-	int neg_adj = 0;
-	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
-						  ptp_info);
-
-	if (delta < 0) {
-		neg_adj = 1;
-		delta = -delta;
-	}
-
-	adj = tstamp->nominal_c_mult;
-	adj *= delta;
-	diff = div_u64(adj, 1000000000ULL);
-
-	write_lock_irqsave(&tstamp->lock, flags);
-	timecounter_read(&tstamp->clock);
-	tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff :
-					tstamp->nominal_c_mult + diff;
-	write_unlock_irqrestore(&tstamp->lock, flags);
-
-	return 0;
-}
-
-static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
-				 struct ptp_clock_request *rq,
-				 int on)
-{
-	struct mlx5e_tstamp *tstamp =
-		container_of(ptp, struct mlx5e_tstamp, ptp_info);
-	struct mlx5e_priv *priv =
-		container_of(tstamp, struct mlx5e_priv, tstamp);
-	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
-	u32 field_select = 0;
-	u8 pin_mode = 0;
-	u8 pattern = 0;
-	int pin = -1;
-	int err = 0;
-
-	if (!MLX5_PPS_CAP(priv->mdev))
-		return -EOPNOTSUPP;
-
-	if (rq->extts.index >= tstamp->ptp_info.n_pins)
-		return -EINVAL;
-
-	if (on) {
-		pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index);
-		if (pin < 0)
-			return -EBUSY;
-		pin_mode = MLX5E_PIN_MODE_IN;
-		pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
-		field_select = MLX5E_MTPPS_FS_PIN_MODE |
-			       MLX5E_MTPPS_FS_PATTERN |
-			       MLX5E_MTPPS_FS_ENABLE;
-	} else {
-		pin = rq->extts.index;
-		field_select = MLX5E_MTPPS_FS_ENABLE;
-	}
-
-	MLX5_SET(mtpps_reg, in, pin, pin);
-	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
-	MLX5_SET(mtpps_reg, in, pattern, pattern);
-	MLX5_SET(mtpps_reg, in, enable, on);
-	MLX5_SET(mtpps_reg, in, field_select, field_select);
-
-	err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
-	if (err)
-		return err;
-
-	return mlx5_set_mtppse(priv->mdev, pin, 0,
-			       MLX5E_EVENT_MODE_REPETETIVE & on);
-}
-
-static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
-				  struct ptp_clock_request *rq,
-				  int on)
-{
-	struct mlx5e_tstamp *tstamp =
-		container_of(ptp, struct mlx5e_tstamp, ptp_info);
-	struct mlx5e_priv *priv =
-		container_of(tstamp, struct mlx5e_priv, tstamp);
-	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
-	u64 nsec_now, nsec_delta, time_stamp = 0;
-	u64 cycles_now, cycles_delta;
-	struct timespec64 ts;
-	unsigned long flags;
-	u32 field_select = 0;
-	u8 pin_mode = 0;
-	u8 pattern = 0;
-	int pin = -1;
-	int err = 0;
-	s64 ns;
-
-	if (!MLX5_PPS_CAP(priv->mdev))
-		return -EOPNOTSUPP;
-
-	if (rq->perout.index >= tstamp->ptp_info.n_pins)
-		return -EINVAL;
-
-	if (on) {
-		pin = ptp_find_pin(tstamp->ptp, PTP_PF_PEROUT,
-				   rq->perout.index);
-		if (pin < 0)
-			return -EBUSY;
-
-		pin_mode = MLX5E_PIN_MODE_OUT;
-		pattern = MLX5E_OUT_PATTERN_PERIODIC;
-		ts.tv_sec = rq->perout.period.sec;
-		ts.tv_nsec = rq->perout.period.nsec;
-		ns = timespec64_to_ns(&ts);
-
-		if ((ns >> 1) != 500000000LL)
-			return -EINVAL;
-
-		ts.tv_sec = rq->perout.start.sec;
-		ts.tv_nsec = rq->perout.start.nsec;
-		ns = timespec64_to_ns(&ts);
-		cycles_now = mlx5_read_internal_timer(tstamp->mdev);
-		write_lock_irqsave(&tstamp->lock, flags);
-		nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
-		nsec_delta = ns - nsec_now;
-		cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
-					 tstamp->cycles.mult);
-		write_unlock_irqrestore(&tstamp->lock, flags);
-		time_stamp = cycles_now + cycles_delta;
-		field_select = MLX5E_MTPPS_FS_PIN_MODE |
-			       MLX5E_MTPPS_FS_PATTERN |
-			       MLX5E_MTPPS_FS_ENABLE |
-			       MLX5E_MTPPS_FS_TIME_STAMP;
-	} else {
-		pin = rq->perout.index;
-		field_select = MLX5E_MTPPS_FS_ENABLE;
-	}
-
-	MLX5_SET(mtpps_reg, in, pin, pin);
-	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
-	MLX5_SET(mtpps_reg, in, pattern, pattern);
-	MLX5_SET(mtpps_reg, in, enable, on);
-	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
-	MLX5_SET(mtpps_reg, in, field_select, field_select);
-
-	err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
-	if (err)
-		return err;
-
-	return mlx5_set_mtppse(priv->mdev, pin, 0,
-			       MLX5E_EVENT_MODE_REPETETIVE & on);
-}
-
-static int mlx5e_pps_configure(struct ptp_clock_info *ptp,
-			       struct ptp_clock_request *rq,
-			       int on)
-{
-	struct mlx5e_tstamp *tstamp =
-		container_of(ptp, struct mlx5e_tstamp, ptp_info);
-
-	tstamp->pps_info.enabled = !!on;
-	return 0;
-}
-
-static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
-			    struct ptp_clock_request *rq,
-			    int on)
-{
-	switch (rq->type) {
-	case PTP_CLK_REQ_EXTTS:
-		return mlx5e_extts_configure(ptp, rq, on);
-	case PTP_CLK_REQ_PEROUT:
-		return mlx5e_perout_configure(ptp, rq, on);
-	case PTP_CLK_REQ_PPS:
-		return mlx5e_pps_configure(ptp, rq, on);
-	default:
-		return -EOPNOTSUPP;
-	}
-	return 0;
-}
-
-static int mlx5e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
-			    enum ptp_pin_function func, unsigned int chan)
-{
-	return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0;
-}
-
-static const struct ptp_clock_info mlx5e_ptp_clock_info = {
-	.owner		= THIS_MODULE,
-	.max_adj	= 100000000,
-	.n_alarm	= 0,
-	.n_ext_ts	= 0,
-	.n_per_out	= 0,
-	.n_pins		= 0,
-	.pps		= 0,
-	.adjfreq	= mlx5e_ptp_adjfreq,
-	.adjtime	= mlx5e_ptp_adjtime,
-	.gettime64	= mlx5e_ptp_gettime,
-	.settime64	= mlx5e_ptp_settime,
-	.enable		= NULL,
-	.verify		= NULL,
-};
-
-static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp)
-{
-	tstamp->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF;
-	tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
-}
-
-static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp)
-{
-	int i;
-
-	tstamp->ptp_info.pin_config =
-		kzalloc(sizeof(*tstamp->ptp_info.pin_config) *
-			       tstamp->ptp_info.n_pins, GFP_KERNEL);
-	if (!tstamp->ptp_info.pin_config)
-		return -ENOMEM;
-	tstamp->ptp_info.enable = mlx5e_ptp_enable;
-	tstamp->ptp_info.verify = mlx5e_ptp_verify;
-	tstamp->ptp_info.pps = 1;
-
-	for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
-		snprintf(tstamp->ptp_info.pin_config[i].name,
-			 sizeof(tstamp->ptp_info.pin_config[i].name),
-			 "mlx5_pps%d", i);
-		tstamp->ptp_info.pin_config[i].index = i;
-		tstamp->ptp_info.pin_config[i].func = PTP_PF_NONE;
-		tstamp->ptp_info.pin_config[i].chan = i;
-	}
-
-	return 0;
-}
-
-static void mlx5e_get_pps_caps(struct mlx5e_priv *priv,
-			       struct mlx5e_tstamp *tstamp)
-{
-	u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
-
-	mlx5_query_mtpps(priv->mdev, out, sizeof(out));
-
-	tstamp->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
-					   cap_number_of_pps_pins);
-	tstamp->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
-					     cap_max_num_of_pps_in_pins);
-	tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
-					      cap_max_num_of_pps_out_pins);
-
-	tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
-	tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
-	tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
-	tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
-	tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
-	tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
-	tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
-	tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
-}
-
-void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
-			     struct ptp_clock_event *event)
-{
-	struct net_device *netdev = priv->netdev;
-	struct mlx5e_tstamp *tstamp = &priv->tstamp;
-	struct timespec64 ts;
-	u64 nsec_now, nsec_delta;
-	u64 cycles_now, cycles_delta;
-	int pin = event->index;
-	s64 ns;
-	unsigned long flags;
-
-	switch (tstamp->ptp_info.pin_config[pin].func) {
-	case PTP_PF_EXTTS:
-		if (tstamp->pps_info.enabled) {
-			event->type = PTP_CLOCK_PPSUSR;
-			event->pps_times.ts_real = ns_to_timespec64(event->timestamp);
-		} else {
-			event->type = PTP_CLOCK_EXTTS;
-		}
-		ptp_clock_event(tstamp->ptp, event);
-		break;
-	case PTP_PF_PEROUT:
-		mlx5e_ptp_gettime(&tstamp->ptp_info, &ts);
-		cycles_now = mlx5_read_internal_timer(tstamp->mdev);
-		ts.tv_sec += 1;
-		ts.tv_nsec = 0;
-		ns = timespec64_to_ns(&ts);
-		write_lock_irqsave(&tstamp->lock, flags);
-		nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
-		nsec_delta = ns - nsec_now;
-		cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
-					 tstamp->cycles.mult);
-		tstamp->pps_info.start[pin] = cycles_now + cycles_delta;
-		queue_work(priv->wq, &tstamp->pps_info.out_work);
-		write_unlock_irqrestore(&tstamp->lock, flags);
-		break;
-	default:
-		netdev_err(netdev, "%s: Unhandled event\n", __func__);
-	}
-}
-
-void mlx5e_timestamp_init(struct mlx5e_priv *priv)
-{
-	struct mlx5e_tstamp *tstamp = &priv->tstamp;
-	u64 ns;
-	u64 frac = 0;
-	u32 dev_freq;
-
-	mlx5e_timestamp_init_config(tstamp);
-	dev_freq = MLX5_CAP_GEN(priv->mdev, device_frequency_khz);
-	if (!dev_freq) {
-		mlx5_core_warn(priv->mdev, "invalid device_frequency_khz, aborting HW clock init\n");
-		return;
-	}
-	rwlock_init(&tstamp->lock);
-	tstamp->cycles.read = mlx5e_read_internal_timer;
-	tstamp->cycles.shift = MLX5E_CYCLES_SHIFT;
-	tstamp->cycles.mult = clocksource_khz2mult(dev_freq,
-						   tstamp->cycles.shift);
-	tstamp->nominal_c_mult = tstamp->cycles.mult;
-	tstamp->cycles.mask = CLOCKSOURCE_MASK(41);
-	tstamp->mdev = priv->mdev;
-
-	timecounter_init(&tstamp->clock, &tstamp->cycles,
-			 ktime_to_ns(ktime_get_real()));
-
-	/* Calculate period in seconds to call the overflow watchdog - to make
-	 * sure counter is checked at least once every wrap around.
-	 */
-	ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask,
-				 frac, &frac);
-	do_div(ns, NSEC_PER_SEC / 2 / HZ);
-	tstamp->overflow_period = ns;
-
-	INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
-	INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
-	if (tstamp->overflow_period)
-		queue_delayed_work(priv->wq, &tstamp->overflow_work, 0);
-	else
-		mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n");
-
-	/* Configure the PHC */
-	tstamp->ptp_info = mlx5e_ptp_clock_info;
-	snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp");
-
-	/* Initialize 1PPS data structures */
-	if (MLX5_PPS_CAP(priv->mdev))
-		mlx5e_get_pps_caps(priv, tstamp);
-	if (tstamp->ptp_info.n_pins)
-		mlx5e_init_pin_config(tstamp);
-
-	tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
-					 &priv->mdev->pdev->dev);
-	if (IS_ERR(tstamp->ptp)) {
-		mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n",
-			       PTR_ERR(tstamp->ptp));
-		tstamp->ptp = NULL;
-	}
-}
-
-void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv)
-{
-	struct mlx5e_tstamp *tstamp = &priv->tstamp;
-
-	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
-		return;
-
-	if (priv->tstamp.ptp) {
-		ptp_clock_unregister(priv->tstamp.ptp);
-		priv->tstamp.ptp = NULL;
-	}
-
-	cancel_work_sync(&tstamp->pps_info.out_work);
-	cancel_delayed_work_sync(&tstamp->overflow_work);
-	kfree(tstamp->ptp_info.pin_config);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index ece3fb147e3e..784e282803db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -134,6 +134,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 	mlx5_core_destroy_mkey(mdev, &res->mkey);
 	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
 	mlx5_core_dealloc_pd(mdev, res->pdn);
+	memset(res, 0, sizeof(*res));
 }
 
 int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
@@ -170,3 +171,15 @@ out:
 
 	return err;
 }
+
+u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev)
+{
+	u8 min_inline_mode;
+
+	mlx5_query_min_inline(mdev, &min_inline_mode);
+	if (min_inline_mode == MLX5_INLINE_MODE_NONE &&
+	    !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
+		min_inline_mode = MLX5_INLINE_MODE_L2;
+
+	return min_inline_mode;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 51c4cc00a186..c6d90b6dd80e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -46,6 +46,13 @@ enum {
 	MLX5E_LOWEST_PRIO_GROUP   = 0,
 };
 
+#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg)  && \
+				   MLX5_CAP_QCAM_REG(mdev, qpts) && \
+				   MLX5_CAP_QCAM_REG(mdev, qpdpm))
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state);
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio);
+
 /* If dcbx mode is non-host set the dcbx mode to host.
  */
 static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
@@ -234,7 +241,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 	u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
 	u8 tc_group[IEEE_8021QAZ_MAX_TCS];
 	int max_tc = mlx5_max_tc(mdev);
-	int err;
+	int err, i;
 
 	mlx5e_build_tc_group(ets, tc_group, max_tc);
 	mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc);
@@ -253,6 +260,14 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 		return err;
 
 	memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n",
+			  __func__, i, ets->prio_tc[i]);
+		mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n",
+			  __func__, i, tc_tx_bw[i], tc_group[i]);
+	}
+
 	return err;
 }
 
@@ -338,6 +353,11 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 	ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en);
 	mlx5_toggle_port_link(mdev);
 
+	if (!ret) {
+		mlx5e_dbg(HW, priv,
+			  "%s: PFC per priority bit mask: 0x%x\n",
+			  __func__, pfc->pfc_en);
+	}
 	return ret;
 }
 
@@ -381,6 +401,113 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 	return 0;
 }
 
+static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct dcb_app temp;
+	bool is_new;
+	int err;
+
+	if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP)
+		return -EINVAL;
+
+	if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+		return -EINVAL;
+
+	if (!MLX5_DSCP_SUPPORTED(priv->mdev))
+		return -EINVAL;
+
+	if (app->protocol >= MLX5E_MAX_DSCP)
+		return -EINVAL;
+
+	/* Save the old entry info */
+	temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+	temp.protocol = app->protocol;
+	temp.priority = priv->dcbx_dp.dscp2prio[app->protocol];
+
+	/* Check if need to switch to dscp trust state */
+	if (!priv->dcbx.dscp_app_cnt) {
+		err =  mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP);
+		if (err)
+			return err;
+	}
+
+	/* Skip the fw command if new and old mapping are the same */
+	if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) {
+		err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority);
+		if (err)
+			goto fw_err;
+	}
+
+	/* Delete the old entry if exists */
+	is_new = false;
+	err = dcb_ieee_delapp(dev, &temp);
+	if (err)
+		is_new = true;
+
+	/* Add new entry and update counter */
+	err = dcb_ieee_setapp(dev, app);
+	if (err)
+		return err;
+
+	if (is_new)
+		priv->dcbx.dscp_app_cnt++;
+
+	return err;
+
+fw_err:
+	mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+	return err;
+}
+
+static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	int err;
+
+	if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP)
+		return -EINVAL;
+
+	if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+		return -EINVAL;
+
+	if (!MLX5_DSCP_SUPPORTED(priv->mdev))
+		return -EINVAL;
+
+	if (app->protocol >= MLX5E_MAX_DSCP)
+		return -EINVAL;
+
+	/* Skip if no dscp app entry */
+	if (!priv->dcbx.dscp_app_cnt)
+		return -ENOENT;
+
+	/* Check if the entry matches fw setting */
+	if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol])
+		return -ENOENT;
+
+	/* Delete the app entry */
+	err = dcb_ieee_delapp(dev, app);
+	if (err)
+		return err;
+
+	/* Reset the priority mapping back to zero */
+	err = mlx5e_set_dscp2prio(priv, app->protocol, 0);
+	if (err)
+		goto fw_err;
+
+	priv->dcbx.dscp_app_cnt--;
+
+	/* Check if need to switch to pcp trust state */
+	if (!priv->dcbx.dscp_app_cnt)
+		err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+
+	return err;
+
+fw_err:
+	mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP);
+	return err;
+}
+
 static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev,
 				       struct ieee_maxrate *maxrate)
 {
@@ -446,6 +573,11 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
 		}
 	}
 
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n",
+			  __func__, i, max_bw_value[i]);
+	}
+
 	return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
 }
 
@@ -471,6 +603,10 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
 		ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
 		ets.tc_tsa[i]   = IEEE_8021QAZ_TSA_ETS;
 		ets.prio_tc[i]  = cee_cfg->prio_to_pg_map[i];
+		mlx5e_dbg(HW, priv,
+			  "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n",
+			  __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i],
+			  ets.prio_tc[i]);
 	}
 
 	err = mlx5e_dbcnl_validate_ets(netdev, &ets);
@@ -740,6 +876,8 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate,
 	.ieee_getpfc	= mlx5e_dcbnl_ieee_getpfc,
 	.ieee_setpfc	= mlx5e_dcbnl_ieee_setpfc,
+	.ieee_setapp    = mlx5e_dcbnl_ieee_setapp,
+	.ieee_delapp    = mlx5e_dcbnl_ieee_delapp,
 	.getdcbx	= mlx5e_dcbnl_getdcbx,
 	.setdcbx	= mlx5e_dcbnl_setdcbx,
 
@@ -801,10 +939,135 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv)
 	mlx5e_dcbnl_ieee_setets_core(priv, &ets);
 }
 
+enum {
+	INIT,
+	DELETE,
+};
+
+static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action)
+{
+	struct dcb_app temp;
+	int i;
+
+	if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+		return;
+
+	if (!MLX5_DSCP_SUPPORTED(priv->mdev))
+		return;
+
+	/* No SEL_DSCP entry in non DSCP state */
+	if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP)
+		return;
+
+	temp.selector = IEEE_8021QAZ_APP_SEL_DSCP;
+	for (i = 0; i < MLX5E_MAX_DSCP; i++) {
+		temp.protocol = i;
+		temp.priority = priv->dcbx_dp.dscp2prio[i];
+		if (action == INIT)
+			dcb_ieee_setapp(priv->netdev, &temp);
+		else
+			dcb_ieee_delapp(priv->netdev, &temp);
+	}
+
+	priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0;
+}
+
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv)
+{
+	mlx5e_dcbnl_dscp_app(priv, INIT);
+}
+
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv)
+{
+	mlx5e_dcbnl_dscp_app(priv, DELETE);
+}
+
+static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv,
+						  struct mlx5e_params *params)
+{
+	params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev);
+	if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP &&
+	    params->tx_min_inline_mode == MLX5_INLINE_MODE_L2)
+		params->tx_min_inline_mode = MLX5_INLINE_MODE_IP;
+}
+
+static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv)
+{
+	struct mlx5e_channels new_channels = {};
+
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto out;
+
+	new_channels.params = priv->channels.params;
+	mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params);
+
+	/* Skip if tx_min_inline is the same */
+	if (new_channels.params.tx_min_inline_mode ==
+	    priv->channels.params.tx_min_inline_mode)
+		goto out;
+
+	if (mlx5e_open_channels(priv, &new_channels))
+		goto out;
+	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+
+out:
+	mutex_unlock(&priv->state_lock);
+}
+
+static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
+{
+	int err;
+
+	err =  mlx5_set_trust_state(priv->mdev, trust_state);
+	if (err)
+		return err;
+	priv->dcbx_dp.trust_state = trust_state;
+	mlx5e_trust_update_sq_inline_mode(priv);
+
+	return err;
+}
+
+static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
+{
+	int err;
+
+	err = mlx5_set_dscp2prio(priv->mdev, dscp, prio);
+	if (err)
+		return err;
+
+	priv->dcbx_dp.dscp2prio[dscp] = prio;
+	return err;
+}
+
+static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err;
+
+	if (!MLX5_DSCP_SUPPORTED(mdev))
+		return 0;
+
+	err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state);
+	if (err)
+		return err;
+
+	mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params);
+
+	err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
 {
 	struct mlx5e_dcbx *dcbx = &priv->dcbx;
 
+	mlx5e_trust_initialize(priv);
+
 	if (!MLX5_CAP_GEN(priv->mdev, qos))
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d12e9fc0d76b..23425f028405 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -31,7 +31,6 @@
  */
 
 #include "en.h"
-#include "en_accel/ipsec.h"
 
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
 			       struct ethtool_drvinfo *drvinfo)
@@ -136,59 +135,15 @@ void mlx5e_build_ptys2ethtool_map(void)
 				       ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
 }
 
-static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	u8 pfc_en_tx;
-	u8 pfc_en_rx;
-	int err;
-
-	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
-		return 0;
-
-	err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx);
-
-	return err ? 0 : pfc_en_tx | pfc_en_rx;
-}
-
-static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	u32 rx_pause;
-	u32 tx_pause;
-	int err;
-
-	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
-		return false;
-
-	err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
-
-	return err ? false : rx_pause | tx_pause;
-}
-
-#define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter))
-#define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num)
-#define MLX5E_NUM_SQ_STATS(priv) \
-	(NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc)
-#define MLX5E_NUM_PFC_COUNTERS(priv) \
-	((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \
-	  NUM_PPORT_PER_PRIO_PFC_COUNTERS)
-
 int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
 {
+	int i, num_stats = 0;
+
 	switch (sset) {
 	case ETH_SS_STATS:
-		return NUM_SW_COUNTERS +
-		       MLX5E_NUM_Q_CNTRS(priv) +
-		       NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) +
-		       NUM_PCIE_COUNTERS(priv) +
-		       MLX5E_NUM_RQ_STATS(priv) +
-		       MLX5E_NUM_SQ_STATS(priv) +
-		       MLX5E_NUM_PFC_COUNTERS(priv) +
-		       ARRAY_SIZE(mlx5e_pme_status_desc) +
-		       ARRAY_SIZE(mlx5e_pme_error_desc) +
-		       mlx5e_ipsec_get_count(priv);
-
+		for (i = 0; i < mlx5e_num_stats_grps; i++)
+			num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
+		return num_stats;
 	case ETH_SS_PRIV_FLAGS:
 		return ARRAY_SIZE(mlx5e_priv_flags);
 	case ETH_SS_TEST:
@@ -208,104 +163,10 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 
 static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data)
 {
-	int i, j, tc, prio, idx = 0;
-	unsigned long pfc_combined;
-
-	/* SW counters */
-	for (i = 0; i < NUM_SW_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
-
-	/* Q counters */
-	for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
-
-	/* VPORT counters */
-	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       vport_stats_desc[i].format);
-
-	/* PPORT counters */
-	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_802_3_stats_desc[i].format);
-
-	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2863_stats_desc[i].format);
-
-	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2819_stats_desc[i].format);
-
-	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_phy_statistical_stats_desc[i].format);
-
-	for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_eth_ext_stats_desc[i].format);
-
-	for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pcie_perf_stats_desc[i].format);
-
-	for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pcie_perf_stats_desc64[i].format);
-
-	for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pcie_perf_stall_stats_desc[i].format);
-
-	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
-		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN,
-				pport_per_prio_traffic_stats_desc[i].format, prio);
-	}
-
-	pfc_combined = mlx5e_query_pfc_combined(priv);
-	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
-		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			char pfc_string[ETH_GSTRING_LEN];
-
-			snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio);
-			sprintf(data + (idx++) * ETH_GSTRING_LEN,
-				pport_per_prio_pfc_stats_desc[i].format, pfc_string);
-		}
-	}
-
-	if (mlx5e_query_global_pause_combined(priv)) {
-		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			sprintf(data + (idx++) * ETH_GSTRING_LEN,
-				pport_per_prio_pfc_stats_desc[i].format, "global");
-		}
-	}
-
-	/* port module event counters */
-	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format);
-
-	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
-
-	/* IPSec counters */
-	idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-		return;
-
-	/* per channel counters */
-	for (i = 0; i < priv->channels.num; i++)
-		for (j = 0; j < NUM_RQ_STATS; j++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN,
-				rq_stats_desc[j].format, i);
+	int i, idx = 0;
 
-	for (tc = 0; tc < priv->channels.params.num_tc; tc++)
-		for (i = 0; i < priv->channels.num; i++)
-			for (j = 0; j < NUM_SQ_STATS; j++)
-				sprintf(data + (idx++) * ETH_GSTRING_LEN,
-					sq_stats_desc[j].format,
-					priv->channel_tc2txq[i][tc]);
+	for (i = 0; i < mlx5e_num_stats_grps; i++)
+		idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx);
 }
 
 void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
@@ -340,10 +201,7 @@ static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 				     struct ethtool_stats *stats, u64 *data)
 {
-	struct mlx5e_channels *channels;
-	struct mlx5_priv *mlx5_priv;
-	int i, j, tc, prio, idx = 0;
-	unsigned long pfc_combined;
+	int i, idx = 0;
 
 	if (!data)
 		return;
@@ -351,102 +209,10 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 	mutex_lock(&priv->state_lock);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
 		mlx5e_update_stats(priv, true);
-	channels = &priv->channels;
 	mutex_unlock(&priv->state_lock);
 
-	for (i = 0; i < NUM_SW_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
-						   sw_stats_desc, i);
-
-	for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
-						   q_stats_desc, i);
-
-	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out,
-						  vport_stats_desc, i);
-
-	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters,
-						  pport_802_3_stats_desc, i);
-
-	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters,
-						  pport_2863_stats_desc, i);
-
-	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
-						  pport_2819_stats_desc, i);
-
-	for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++)
-		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
-						  pport_phy_statistical_stats_desc, i);
-
-	for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++)
-		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters,
-						  pport_eth_ext_stats_desc, i);
-
-	for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++)
-		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
-						  pcie_perf_stats_desc, i);
-
-	for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++)
-		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters,
-						  pcie_perf_stats_desc64, i);
-
-	for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++)
-		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
-						  pcie_perf_stall_stats_desc, i);
-
-	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
-		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
-			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
-						 pport_per_prio_traffic_stats_desc, i);
-	}
-
-	pfc_combined = mlx5e_query_pfc_combined(priv);
-	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
-		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
-							  pport_per_prio_pfc_stats_desc, i);
-		}
-	}
-
-	if (mlx5e_query_global_pause_combined(priv)) {
-		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
-							  pport_per_prio_pfc_stats_desc, i);
-		}
-	}
-
-	/* port module event counters */
-	mlx5_priv =  &priv->mdev->priv;
-	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++)
-		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
-						   mlx5e_pme_status_desc, i);
-
-	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
-		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
-						   mlx5e_pme_error_desc, i);
-
-	/* IPSec counters */
-	idx += mlx5e_ipsec_get_stats(priv, data + idx);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-		return;
-
-	/* per channel counters */
-	for (i = 0; i < channels->num; i++)
-		for (j = 0; j < NUM_RQ_STATS; j++)
-			data[idx++] =
-			       MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats,
-						    rq_stats_desc, j);
-
-	for (tc = 0; tc < priv->channels.params.num_tc; tc++)
-		for (i = 0; i < channels->num; i++)
-			for (j = 0; j < NUM_SQ_STATS; j++)
-				data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats,
-								   sq_stats_desc, j);
+	for (i = 0; i < mlx5e_num_stats_grps; i++)
+		idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx);
 }
 
 static void mlx5e_get_ethtool_stats(struct net_device *dev,
@@ -1417,14 +1183,15 @@ static int mlx5e_set_pauseparam(struct net_device *netdev,
 int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
 			      struct ethtool_ts_info *info)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int ret;
 
 	ret = ethtool_op_get_ts_info(priv->netdev, info);
 	if (ret)
 		return ret;
 
-	info->phc_index = priv->tstamp.ptp ?
-			  ptp_clock_index(priv->tstamp.ptp) : -1;
+	info->phc_index = mdev->clock.ptp ?
+			  ptp_clock_index(mdev->clock.ptp) : -1;
 
 	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
 		return 0;
@@ -1573,6 +1340,16 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	return mlx5_set_port_wol(mdev, mlx5_wol_mode);
 }
 
+static u32 mlx5e_get_msglevel(struct net_device *dev)
+{
+	return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
+}
+
+static void mlx5e_set_msglevel(struct net_device *dev, u32 val)
+{
+	((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val;
+}
+
 static int mlx5e_set_phys_id(struct net_device *dev,
 			     enum ethtool_phys_id_state state)
 {
@@ -1677,29 +1454,36 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
 
 typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
 
-static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
+static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
+				     bool is_rx_cq)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
-	bool rx_mode_changed;
-	u8 rx_cq_period_mode;
+	bool mode_changed;
+	u8 cq_period_mode, current_cq_period_mode;
 	int err = 0;
 
-	rx_cq_period_mode = enable ?
+	cq_period_mode = enable ?
 		MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 		MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
-	rx_mode_changed = rx_cq_period_mode != priv->channels.params.rx_cq_period_mode;
+	current_cq_period_mode = is_rx_cq ?
+		priv->channels.params.rx_cq_moderation.cq_period_mode :
+		priv->channels.params.tx_cq_moderation.cq_period_mode;
+	mode_changed = cq_period_mode != current_cq_period_mode;
 
-	if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
+	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
 	    !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
 		return -EOPNOTSUPP;
 
-	if (!rx_mode_changed)
+	if (!mode_changed)
 		return 0;
 
 	new_channels.params = priv->channels.params;
-	mlx5e_set_rx_cq_mode_params(&new_channels.params, rx_cq_period_mode);
+	if (is_rx_cq)
+		mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode);
+	else
+		mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode);
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
@@ -1714,6 +1498,16 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
 	return 0;
 }
 
+static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+	return set_pflag_cqe_based_moder(netdev, enable, false);
+}
+
+static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
+{
+	return set_pflag_cqe_based_moder(netdev, enable, true);
+}
+
 int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val)
 {
 	bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS);
@@ -1754,7 +1548,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
 	if (!MLX5_CAP_GEN(mdev, cqe_compression))
 		return -EOPNOTSUPP;
 
-	if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) {
+	if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
 		netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
 		return -EINVAL;
 	}
@@ -1802,6 +1596,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
 		goto out;
 
 	err = mlx5e_handle_pflag(netdev, pflags,
+				 MLX5E_PFLAG_TX_CQE_BASED_MODER,
+				 set_pflag_tx_cqe_based_moder);
+	if (err)
+		goto out;
+
+	err = mlx5e_handle_pflag(netdev, pflags,
 				 MLX5E_PFLAG_RX_CQE_COMPRESS,
 				 set_pflag_rx_cqe_compress);
 
@@ -1905,4 +1705,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_priv_flags    = mlx5e_get_priv_flags,
 	.set_priv_flags    = mlx5e_set_priv_flags,
 	.self_test         = mlx5e_self_test,
+	.get_msglevel      = mlx5e_get_msglevel,
+	.set_msglevel      = mlx5e_set_msglevel,
+
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 4837045ffba3..def513484845 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -118,7 +118,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
 	int i;
 
 	list_size = 0;
-	for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID)
+	for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID)
 		list_size++;
 
 	max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
@@ -135,7 +135,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
 		return -ENOMEM;
 
 	i = 0;
-	for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) {
+	for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
 		if (i >= list_size)
 			break;
 		vlans[i++] = vlan;
@@ -154,7 +154,8 @@ enum mlx5e_vlan_rule_type {
 	MLX5E_VLAN_RULE_TYPE_UNTAGGED,
 	MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID,
 	MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID,
-	MLX5E_VLAN_RULE_TYPE_MATCH_VID,
+	MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID,
+	MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID,
 };
 
 static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
@@ -162,7 +163,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 				 u16 vid, struct mlx5_flow_spec *spec)
 {
 	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle **rule_p;
 	MLX5_DECLARE_FLOW_ACT(flow_act);
 	int err = 0;
@@ -174,6 +175,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 
 	switch (rule_type) {
 	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+		/* cvlan_tag enabled in match criteria and
+		 * disabled in match value means both S & C tags
+		 * don't exist (untagged of both)
+		 */
 		rule_p = &priv->fs.vlan.untagged_rule;
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.cvlan_tag);
@@ -190,8 +195,18 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 				 outer_headers.svlan_tag);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
 		break;
-	default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
-		rule_p = &priv->fs.vlan.active_vlans_rule[vid];
+	case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+		rule_p = &priv->fs.vlan.active_svlans_rule[vid];
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.svlan_tag);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.first_vid);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid,
+			 vid);
+		break;
+	default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */
+		rule_p = &priv->fs.vlan.active_cvlans_rule[vid];
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.cvlan_tag);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
@@ -223,7 +238,7 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 	if (!spec)
 		return -ENOMEM;
 
-	if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID)
+	if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID)
 		mlx5e_vport_context_update_vlans(priv);
 
 	err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec);
@@ -255,11 +270,17 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
 			priv->fs.vlan.any_svlan_rule = NULL;
 		}
 		break;
-	case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
+	case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
+		if (priv->fs.vlan.active_svlans_rule[vid]) {
+			mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]);
+			priv->fs.vlan.active_svlans_rule[vid] = NULL;
+		}
+		break;
+	case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
 		mlx5e_vport_context_update_vlans(priv);
-		if (priv->fs.vlan.active_vlans_rule[vid]) {
-			mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]);
-			priv->fs.vlan.active_vlans_rule[vid] = NULL;
+		if (priv->fs.vlan.active_cvlans_rule[vid]) {
+			mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]);
+			priv->fs.vlan.active_cvlans_rule[vid] = NULL;
 		}
 		mlx5e_vport_context_update_vlans(priv);
 		break;
@@ -283,46 +304,83 @@ static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv)
 	return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0);
 }
 
-void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv)
+void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
 {
-	if (!priv->fs.vlan.filter_disabled)
+	if (!priv->fs.vlan.cvlan_filter_disabled)
 		return;
 
-	priv->fs.vlan.filter_disabled = false;
+	priv->fs.vlan.cvlan_filter_disabled = false;
 	if (priv->netdev->flags & IFF_PROMISC)
 		return;
 	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
 }
 
-void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv)
+void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv)
 {
-	if (priv->fs.vlan.filter_disabled)
+	if (priv->fs.vlan.cvlan_filter_disabled)
 		return;
 
-	priv->fs.vlan.filter_disabled = true;
+	priv->fs.vlan.cvlan_filter_disabled = true;
 	if (priv->netdev->flags & IFF_PROMISC)
 		return;
 	mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
 }
 
-int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
-			  u16 vid)
+static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
+	int err;
+
+	set_bit(vid, priv->fs.vlan.active_cvlans);
 
-	set_bit(vid, priv->fs.vlan.active_vlans);
+	err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+	if (err)
+		clear_bit(vid, priv->fs.vlan.active_cvlans);
+
+	return err;
+}
+
+static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
+{
+	struct net_device *netdev = priv->netdev;
+	int err;
+
+	set_bit(vid, priv->fs.vlan.active_svlans);
+
+	err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+	if (err) {
+		clear_bit(vid, priv->fs.vlan.active_svlans);
+		return err;
+	}
 
-	return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+	/* Need to fix some features.. */
+	netdev_update_features(netdev);
+	return err;
 }
 
-int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
-			   u16 vid)
+int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	clear_bit(vid, priv->fs.vlan.active_vlans);
+	if (be16_to_cpu(proto) == ETH_P_8021Q)
+		return mlx5e_vlan_rx_add_cvid(priv, vid);
+	else if (be16_to_cpu(proto) == ETH_P_8021AD)
+		return mlx5e_vlan_rx_add_svid(priv, vid);
 
-	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+	return -EOPNOTSUPP;
+}
+
+int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (be16_to_cpu(proto) == ETH_P_8021Q) {
+		clear_bit(vid, priv->fs.vlan.active_cvlans);
+		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
+	} else if (be16_to_cpu(proto) == ETH_P_8021AD) {
+		clear_bit(vid, priv->fs.vlan.active_svlans);
+		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
+		netdev_update_features(dev);
+	}
 
 	return 0;
 }
@@ -333,11 +391,14 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
 
 	mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
 
-	for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
-		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+	for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
 	}
 
-	if (priv->fs.vlan.filter_disabled &&
+	for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+	if (priv->fs.vlan.cvlan_filter_disabled &&
 	    !(priv->netdev->flags & IFF_PROMISC))
 		mlx5e_add_any_vid_rules(priv);
 }
@@ -348,11 +409,14 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
 
 	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
 
-	for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
-		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+	for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
 	}
 
-	if (priv->fs.vlan.filter_disabled &&
+	for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
+
+	if (priv->fs.vlan.cvlan_filter_disabled &&
 	    !(priv->netdev->flags & IFF_PROMISC))
 		mlx5e_del_any_vid_rules(priv);
 }
@@ -548,8 +612,11 @@ void mlx5e_set_rx_mode_work(struct work_struct *work)
 	bool disable_broadcast =  ea->broadcast_enabled && !broadcast_enabled;
 
 	if (enable_promisc) {
+		if (!priv->channels.params.vlan_strip_disable)
+			netdev_warn_once(ndev,
+					 "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n");
 		mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC);
-		if (!priv->fs.vlan.filter_disabled)
+		if (!priv->fs.vlan.cvlan_filter_disabled)
 			mlx5e_add_any_vid_rules(priv);
 	}
 	if (enable_allmulti)
@@ -564,7 +631,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work)
 	if (disable_allmulti)
 		mlx5e_del_l2_flow_rule(priv, &ea->allmulti);
 	if (disable_promisc) {
-		if (!priv->fs.vlan.filter_disabled)
+		if (!priv->fs.vlan.cvlan_filter_disabled)
 			mlx5e_del_any_vid_rules(priv);
 		mlx5e_del_l2_flow_rule(priv, &ea->promisc);
 	}
@@ -741,7 +808,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 
 static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv)
 {
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5e_ttc_table *ttc;
 	struct mlx5_flow_handle **rules;
 	struct mlx5_flow_table *ft;
@@ -912,7 +979,7 @@ mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
 
 static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv)
 {
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle **rules;
 	struct mlx5e_ttc_table *ttc;
 	struct mlx5_flow_table *ft;
@@ -1008,7 +1075,7 @@ err:
 	return err;
 }
 
-static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
+int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
 {
 	struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc;
 	struct mlx5_flow_table_attr ft_attr = {};
@@ -1044,7 +1111,7 @@ err:
 	return err;
 }
 
-static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
 {
 	struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc;
 
@@ -1109,7 +1176,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 				  struct mlx5e_l2_rule *ai, int type)
 {
 	struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	MLX5_DECLARE_FLOW_ACT(flow_act);
 	struct mlx5_flow_spec *spec;
 	int err = 0;
@@ -1268,13 +1335,15 @@ err_destroy_flow_table:
 	return err;
 }
 
-#define MLX5E_NUM_VLAN_GROUPS	3
+#define MLX5E_NUM_VLAN_GROUPS	4
 #define MLX5E_VLAN_GROUP0_SIZE	BIT(12)
-#define MLX5E_VLAN_GROUP1_SIZE	BIT(1)
-#define MLX5E_VLAN_GROUP2_SIZE	BIT(0)
+#define MLX5E_VLAN_GROUP1_SIZE	BIT(12)
+#define MLX5E_VLAN_GROUP2_SIZE	BIT(1)
+#define MLX5E_VLAN_GROUP3_SIZE	BIT(0)
 #define MLX5E_VLAN_TABLE_SIZE	(MLX5E_VLAN_GROUP0_SIZE +\
 				 MLX5E_VLAN_GROUP1_SIZE +\
-				 MLX5E_VLAN_GROUP2_SIZE)
+				 MLX5E_VLAN_GROUP2_SIZE +\
+				 MLX5E_VLAN_GROUP3_SIZE)
 
 static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in,
 					    int inlen)
@@ -1297,7 +1366,8 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in
 
 	memset(in, 0, inlen);
 	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid);
 	MLX5_SET_CFG(in, start_flow_index, ix);
 	ix += MLX5E_VLAN_GROUP1_SIZE;
 	MLX5_SET_CFG(in, end_flow_index, ix - 1);
@@ -1308,7 +1378,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in
 
 	memset(in, 0, inlen);
 	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag);
 	MLX5_SET_CFG(in, start_flow_index, ix);
 	ix += MLX5E_VLAN_GROUP2_SIZE;
 	MLX5_SET_CFG(in, end_flow_index, ix - 1);
@@ -1317,6 +1387,17 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in
 		goto err_destroy_groups;
 	ft->num_groups++;
 
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_VLAN_GROUP3_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err_destroy_groups;
+	ft->num_groups++;
+
 	return 0;
 
 err_destroy_groups:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cc11bbbd0309..d2b057a3e512 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -196,6 +196,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 		s->rx_bytes	+= rq_stats->bytes;
 		s->rx_lro_packets += rq_stats->lro_packets;
 		s->rx_lro_bytes	+= rq_stats->lro_bytes;
+		s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
 		s->rx_csum_none	+= rq_stats->csum_none;
 		s->rx_csum_complete += rq_stats->csum_complete;
 		s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
@@ -224,6 +225,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 			s->tx_tso_bytes		+= sq_stats->tso_bytes;
 			s->tx_tso_inner_packets	+= sq_stats->tso_inner_packets;
 			s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
+			s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
@@ -373,8 +375,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 			      enum mlx5_dev_event event, unsigned long param)
 {
 	struct mlx5e_priv *priv = vpriv;
-	struct ptp_clock_event ptp_event;
-	struct mlx5_eqe *eqe = NULL;
 
 	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
 		return;
@@ -384,14 +384,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 	case MLX5_DEV_EVENT_PORT_DOWN:
 		queue_work(priv->wq, &priv->update_carrier_work);
 		break;
-	case MLX5_DEV_EVENT_PPS:
-		eqe = (struct mlx5_eqe *)param;
-		ptp_event.index = eqe->data.pps.pin;
-		ptp_event.timestamp =
-			timecounter_cyc2time(&priv->tstamp.clock,
-					     be64_to_cpu(eqe->data.pps.time_stamp));
-		mlx5e_pps_event_handler(vpriv, &ptp_event);
-		break;
 	default:
 		break;
 	}
@@ -585,6 +577,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	rq->pdev    = c->pdev;
 	rq->netdev  = c->netdev;
 	rq->tstamp  = c->tstamp;
+	rq->clock   = &mdev->clock;
 	rq->channel = c;
 	rq->ix      = c->ix;
 	rq->mdev    = mdev;
@@ -690,7 +683,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	}
 
 	INIT_WORK(&rq->am.work, mlx5e_rx_am_work);
-	rq->am.mode = params->rx_cq_period_mode;
+	rq->am.mode = params->rx_cq_moderation.cq_period_mode;
 	rq->page_cache.head = 0;
 	rq->page_cache.tail = 0;
 
@@ -1123,6 +1116,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 
 	sq->pdev      = c->pdev;
 	sq->tstamp    = c->tstamp;
+	sq->clock     = &mdev->clock;
 	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
 	sq->txq_ix    = txq_ix;
@@ -1982,7 +1976,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	}
 
 	mlx5e_build_common_cq_param(priv, param);
-	param->cq_period_mode = params->rx_cq_period_mode;
+	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 }
 
 static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
@@ -1994,8 +1988,7 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 	MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
 
 	mlx5e_build_common_cq_param(priv, param);
-
-	param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+	param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
 }
 
 static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
@@ -2678,6 +2671,12 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 		netif_carrier_on(netdev);
 }
 
+void mlx5e_timestamp_set(struct mlx5e_priv *priv)
+{
+	priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
+	priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
 int mlx5e_open_locked(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -2693,7 +2692,7 @@ int mlx5e_open_locked(struct net_device *netdev)
 	mlx5e_activate_priv_channels(priv);
 	if (priv->profile->update_carrier)
 		priv->profile->update_carrier(priv);
-	mlx5e_timestamp_init(priv);
+	mlx5e_timestamp_set(priv);
 
 	if (priv->profile->update_stats)
 		queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
@@ -2731,7 +2730,6 @@ int mlx5e_close_locked(struct net_device *netdev)
 
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
-	mlx5e_timestamp_cleanup(priv);
 	netif_carrier_off(priv->netdev);
 	mlx5e_deactivate_priv_channels(priv);
 	mlx5e_close_channels(&priv->channels);
@@ -3086,13 +3084,10 @@ out:
 }
 
 #ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
 				     struct tc_cls_flower_offload *cls_flower)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
-	    cls_flower->common.chain_index)
+	if (cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
 	switch (cls_flower->command) {
@@ -3106,17 +3101,54 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
 		return -EOPNOTSUPP;
 	}
 }
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+			    void *cb_priv)
+{
+	struct mlx5e_priv *priv = cb_priv;
+
+	if (!tc_can_offload(priv->netdev))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return mlx5e_setup_tc_cls_flower(priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlx5e_setup_tc_block(struct net_device *dev,
+				struct tc_block_offload *f)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
+					     priv, priv);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
+					priv);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
 #endif
 
-static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			  void *type_data)
+int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		   void *type_data)
 {
 	switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
-	case TC_SETUP_CLSFLOWER:
-		return mlx5e_setup_tc_cls_flower(dev, type_data);
+	case TC_SETUP_BLOCK:
+		return mlx5e_setup_tc_block(dev, type_data);
 #endif
-	case TC_SETUP_MQPRIO:
+	case TC_SETUP_QDISC_MQPRIO:
 		return mlx5e_setup_tc_mqprio(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
@@ -3230,14 +3262,14 @@ out:
 	return err;
 }
 
-static int set_feature_vlan_filter(struct net_device *netdev, bool enable)
+static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
 	if (enable)
-		mlx5e_enable_vlan_filter(priv);
+		mlx5e_enable_cvlan_filter(priv);
 	else
-		mlx5e_disable_vlan_filter(priv);
+		mlx5e_disable_cvlan_filter(priv);
 
 	return 0;
 }
@@ -3348,7 +3380,7 @@ static int mlx5e_set_features(struct net_device *netdev,
 				    set_feature_lro);
 	err |= mlx5e_handle_feature(netdev, features,
 				    NETIF_F_HW_VLAN_CTAG_FILTER,
-				    set_feature_vlan_filter);
+				    set_feature_cvlan_filter);
 	err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC,
 				    set_feature_tc_num_filters);
 	err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL,
@@ -3365,6 +3397,25 @@ static int mlx5e_set_features(struct net_device *netdev,
 	return err ? -EINVAL : 0;
 }
 
+static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
+					    netdev_features_t features)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mutex_lock(&priv->state_lock);
+	if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) {
+		/* HW strips the outer C-tag header, this is a problem
+		 * for S-tag traffic.
+		 */
+		features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+		if (!priv->channels.params.vlan_strip_disable)
+			netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
+	}
+	mutex_unlock(&priv->state_lock);
+
+	return features;
+}
+
 static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -3403,6 +3454,80 @@ out:
 	return err;
 }
 
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* TX HW timestamp */
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	mutex_lock(&priv->state_lock);
+	/* RX HW timestamp */
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		/* Reset CQE compression to Admin default */
+		mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+		/* Disable CQE compression */
+		netdev_warn(priv->netdev, "Disabling cqe compression");
+		err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
+		if (err) {
+			netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+			mutex_unlock(&priv->state_lock);
+			return err;
+		}
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		mutex_unlock(&priv->state_lock);
+		return -ERANGE;
+	}
+
+	memcpy(&priv->tstamp, &config, sizeof(config));
+	mutex_unlock(&priv->state_lock);
+
+	return copy_to_user(ifr->ifr_data, &config,
+			    sizeof(config)) ? -EFAULT : 0;
+}
+
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+	struct hwtstamp_config *cfg = &priv->tstamp;
+
+	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+		return -EOPNOTSUPP;
+
+	return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
+}
+
 static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -3726,7 +3851,7 @@ static u32 mlx5e_xdp_query(struct net_device *dev)
 	return prog_id;
 }
 
-static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
@@ -3768,6 +3893,7 @@ static const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
 	.ndo_set_features        = mlx5e_set_features,
+	.ndo_fix_features        = mlx5e_fix_features,
 	.ndo_change_mtu          = mlx5e_change_mtu,
 	.ndo_do_ioctl            = mlx5e_ioctl,
 	.ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
@@ -3778,7 +3904,7 @@ static const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
-	.ndo_xdp		 = mlx5e_xdp,
+	.ndo_bpf		 = mlx5e_xdp,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller     = mlx5e_netpoll,
 #endif
@@ -3882,14 +4008,32 @@ static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw)
 		 (pci_bw <= 16000) && (pci_bw < link_speed));
 }
 
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+	params->tx_cq_moderation.cq_period_mode = cq_period_mode;
+
+	params->tx_cq_moderation.pkts =
+		MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+	params->tx_cq_moderation.usec =
+		MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+
+	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+		params->tx_cq_moderation.usec =
+			MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
+			params->tx_cq_moderation.cq_period_mode ==
+				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 {
-	params->rx_cq_period_mode = cq_period_mode;
+	params->rx_cq_moderation.cq_period_mode = cq_period_mode;
 
 	params->rx_cq_moderation.pkts =
 		MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
 	params->rx_cq_moderation.usec =
-			MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+		MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
 
 	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
 		params->rx_cq_moderation.usec =
@@ -3897,10 +4041,11 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 
 	if (params->rx_am_enabled)
 		params->rx_cq_moderation =
-			mlx5e_am_get_def_profile(params->rx_cq_period_mode);
+			mlx5e_am_get_def_profile(cq_period_mode);
 
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
-			params->rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+			params->rx_cq_moderation.cq_period_mode ==
+				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 }
 
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
@@ -3960,16 +4105,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 			MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 	params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
-
-	params->tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
-	params->tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+	mlx5e_set_tx_cq_mode_params(params, cq_period_mode);
 
 	/* TX inline */
 	params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
-	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
-	if (params->tx_min_inline_mode == MLX5_INLINE_MODE_NONE &&
-	    !MLX5_CAP_ETH(mdev, wqe_vlan_insert))
-		params->tx_min_inline_mode = MLX5_INLINE_MODE_L2;
+	params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
 
 	/* RSS */
 	params->rss_hfunc = ETH_RSS_HASH_XOR;
@@ -3989,6 +4129,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
 	priv->netdev      = netdev;
 	priv->profile     = profile;
 	priv->ppriv       = ppriv;
+	priv->msglevel    = MLX5E_MSG_LEVEL;
 	priv->hard_mtu = MLX5E_ETH_HARD_MTU;
 
 	mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev));
@@ -4055,6 +4196,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_TX;
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
 	if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
 		netdev->hw_features     |= NETIF_F_GSO_PARTIAL;
@@ -4112,6 +4254,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	}
 
 	netdev->features         |= NETIF_F_HIGHDMA;
+	netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
 
 	netdev->priv_flags       |= IFF_UNICAST_FLT;
 
@@ -4269,7 +4412,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
 	if (netdev->reg_state != NETREG_REGISTERED)
 		return;
-
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	mlx5e_dcbnl_init_app(priv);
+#endif
 	/* Device already registered: sync netdev system state */
 	if (mlx5e_vxlan_allowed(mdev)) {
 		rtnl_lock();
@@ -4290,6 +4435,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	if (priv->netdev->reg_state == NETREG_REGISTERED)
+		mlx5e_dcbnl_delete_app(priv);
+#endif
+
 	rtnl_lock();
 	if (netif_running(priv->netdev))
 		mlx5e_close(priv->netdev);
@@ -4510,6 +4660,9 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
 		goto err_detach;
 	}
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	mlx5e_dcbnl_init_app(priv);
+#endif
 	return priv;
 
 err_detach:
@@ -4526,6 +4679,9 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
 	struct mlx5e_priv *priv = vpriv;
 	void *ppriv = priv->ppriv;
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	mlx5e_dcbnl_delete_app(priv);
+#endif
 	unregister_netdev(priv->netdev);
 	mlx5e_detach(mdev, vpriv);
 	mlx5e_destroy_netdev(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 45e03c427faf..2c43606c26b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -34,6 +34,7 @@
 #include <linux/mlx5/fs.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
+#include <net/act_api.h>
 #include <net/netevent.h>
 #include <net/arp.h>
 
@@ -658,23 +659,12 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 }
 
 static int
-mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
 			      struct tc_cls_flower_offload *cls_flower)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
-	    cls_flower->common.chain_index)
+	if (cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (cls_flower->egress_dev) {
-		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
-		dev = mlx5_eswitch_get_uplink_netdev(esw);
-		return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
-						     cls_flower);
-	}
-
 	switch (cls_flower->command) {
 	case TC_CLSFLOWER_REPLACE:
 		return mlx5e_configure_flower(priv, cls_flower);
@@ -687,12 +677,48 @@ mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
 	}
 }
 
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+				 void *cb_priv)
+{
+	struct mlx5e_priv *priv = cb_priv;
+
+	if (!tc_can_offload(priv->netdev))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return mlx5e_rep_setup_tc_cls_flower(priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlx5e_rep_setup_tc_block(struct net_device *dev,
+				    struct tc_block_offload *f)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
+					     priv, priv);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
 	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return mlx5e_rep_setup_tc_cls_flower(dev, type_data);
+	case TC_SETUP_BLOCK:
+		return mlx5e_rep_setup_tc_block(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -986,6 +1012,7 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
 {
 	struct mlx5e_rep_priv *rpriv;
 	struct net_device *netdev;
+	struct mlx5e_priv *upriv;
 	int err;
 
 	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
@@ -1017,15 +1044,25 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
 		goto err_detach_netdev;
 	}
 
+	upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw));
+	err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb,
+					 upriv);
+	if (err)
+		goto err_neigh_cleanup;
+
 	err = register_netdev(netdev);
 	if (err) {
 		pr_warn("Failed to register representor netdev for vport %d\n",
 			rep->vport);
-		goto err_neigh_cleanup;
+		goto err_egdev_cleanup;
 	}
 
 	return 0;
 
+err_egdev_cleanup:
+	tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
+				     upriv);
+
 err_neigh_cleanup:
 	mlx5e_rep_neigh_cleanup(rpriv);
 
@@ -1045,9 +1082,12 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	void *ppriv = priv->ppriv;
+	struct mlx5e_priv *upriv;
 
 	unregister_netdev(rep->netdev);
-
+	upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw));
+	tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
+				     upriv);
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_detach_netdev(priv);
 	mlx5e_destroy_netdev(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 91b1b0938931..5b499c7a698f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -42,10 +42,11 @@
 #include "en_rep.h"
 #include "ipoib/ipoib.h"
 #include "en_accel/ipsec_rxtx.h"
+#include "lib/clock.h"
 
-static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
+static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 {
-	return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
+	return config->rx_filter == HWTSTAMP_FILTER_ALL;
 }
 
 static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
@@ -560,7 +561,6 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 	u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
 		(l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
 
-	skb->mac_len = ETH_HLEN;
 	proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
 
 	tot_len = cqe_bcnt - network_depth;
@@ -607,10 +607,11 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
 	skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
 }
 
-static inline bool is_first_ethertype_ip(struct sk_buff *skb)
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
 {
 	__be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
 
+	ethertype = __vlan_get_protocol(skb, ethertype, network_depth);
 	return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
 }
 
@@ -620,6 +621,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 				     struct sk_buff *skb,
 				     bool   lro)
 {
+	int network_depth = 0;
+
 	if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
 		goto csum_none;
 
@@ -629,9 +632,17 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		return;
 	}
 
-	if (is_first_ethertype_ip(skb)) {
+	if (is_last_ethertype_ip(skb, &network_depth)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
+		if (network_depth > ETH_HLEN)
+			/* CQE csum is calculated from the IP header and does
+			 * not cover VLAN headers (if present). This will add
+			 * the checksum manually.
+			 */
+			skb->csum = csum_partial(skb->data + ETH_HLEN,
+						 network_depth - ETH_HLEN,
+						 skb->csum);
 		rq->stats.csum_complete++;
 		return;
 	}
@@ -659,9 +670,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 				      struct sk_buff *skb)
 {
 	struct net_device *netdev = rq->netdev;
-	struct mlx5e_tstamp *tstamp = rq->tstamp;
 	int lro_num_seg;
 
+	skb->mac_len = ETH_HLEN;
 	lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
 	if (lro_num_seg > 1) {
 		mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
@@ -674,17 +685,20 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 		rq->stats.lro_bytes += cqe_bcnt;
 	}
 
-	if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
-		mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb));
+	if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+		skb_hwtstamps(skb)->hwtstamp =
+				mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
 
 	skb_record_rx_queue(skb, rq->ix);
 
 	if (likely(netdev->features & NETIF_F_RXHASH))
 		mlx5e_skb_set_hash(cqe, skb);
 
-	if (cqe_has_vlan(cqe))
+	if (cqe_has_vlan(cqe)) {
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 				       be16_to_cpu(cqe->vlan_info));
+		rq->stats.removed_vlan_packets++;
+	}
 
 	skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 
@@ -795,6 +809,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
 		return false;
 
 	xdp.data = va + *rx_headroom;
+	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + *len;
 	xdp.data_hard_start = va;
 
@@ -1160,12 +1175,25 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 					 u32 cqe_bcnt,
 					 struct sk_buff *skb)
 {
-	struct net_device *netdev = rq->netdev;
-	struct mlx5e_tstamp *tstamp = rq->tstamp;
+	struct net_device *netdev;
 	char *pseudo_header;
+	u32 qpn;
 	u8 *dgid;
 	u8 g;
 
+	qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff;
+	netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn);
+
+	/* No mapping present, cannot process SKB. This might happen if a child
+	 * interface is going down while having unprocessed CQEs on parent RQ
+	 */
+	if (unlikely(!netdev)) {
+		/* TODO: add drop counters support */
+		skb->dev = NULL;
+		pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn);
+		return;
+	}
+
 	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
 	dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
 	if ((!g) || dgid[0] != 0xff)
@@ -1186,8 +1214,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	skb->ip_summed = CHECKSUM_COMPLETE;
 	skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
 
-	if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
-		mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb));
+	if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+		skb_hwtstamps(skb)->hwtstamp =
+				mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
 
 	skb_record_rx_queue(skb, rq->ix);
 
@@ -1227,6 +1256,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 		goto wq_free_wqe;
 
 	mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+	if (unlikely(!skb->dev)) {
+		dev_kfree_skb_any(skb);
+		goto wq_free_wqe;
+	}
 	napi_gro_receive(rq->cq.napi, skb);
 
 wq_free_wqe:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
index acf32fe952cd..e401d9d245f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c
@@ -63,7 +63,11 @@ profile[MLX5_CQ_PERIOD_NUM_MODES][MLX5E_PARAMS_AM_NUM_PROFILES] = {
 
 static inline struct mlx5e_cq_moder mlx5e_am_get_profile(u8 cq_period_mode, int ix)
 {
-	return profile[cq_period_mode][ix];
+	struct mlx5e_cq_moder cq_moder;
+
+	cq_moder = profile[cq_period_mode][ix];
+	cq_moder.cq_period_mode = cq_period_mode;
+	return cq_moder;
 }
 
 struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode)
@@ -75,7 +79,7 @@ struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode)
 	else /* MLX5_CQ_PERIOD_MODE_START_FROM_EQE */
 		default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_EQE;
 
-	return profile[rx_cq_period_mode][default_profile_ix];
+	return mlx5e_am_get_profile(rx_cq_period_mode, default_profile_ix);
 }
 
 /* Adaptive moderation logic */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
new file mode 100644
index 000000000000..b74ddc7984bc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -0,0 +1,899 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "en.h"
+#include "en_accel/ipsec.h"
+
+static const struct counter_desc sw_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
+};
+
+#define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
+
+static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv)
+{
+	return NUM_SW_COUNTERS;
+}
+
+static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_SW_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_SW_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i);
+	return idx;
+}
+
+static const struct counter_desc q_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
+};
+
+#define NUM_Q_COUNTERS			ARRAY_SIZE(q_stats_desc)
+
+static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv)
+{
+	return priv->q_counter ? NUM_Q_COUNTERS : 0;
+}
+
+static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
+		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, q_stats_desc, i);
+	return idx;
+}
+
+#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
+static const struct counter_desc vport_stats_desc[] = {
+	{ "rx_vport_unicast_packets",
+		VPORT_COUNTER_OFF(received_eth_unicast.packets) },
+	{ "rx_vport_unicast_bytes",
+		VPORT_COUNTER_OFF(received_eth_unicast.octets) },
+	{ "tx_vport_unicast_packets",
+		VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) },
+	{ "tx_vport_unicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) },
+	{ "rx_vport_multicast_packets",
+		VPORT_COUNTER_OFF(received_eth_multicast.packets) },
+	{ "rx_vport_multicast_bytes",
+		VPORT_COUNTER_OFF(received_eth_multicast.octets) },
+	{ "tx_vport_multicast_packets",
+		VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) },
+	{ "tx_vport_multicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) },
+	{ "rx_vport_broadcast_packets",
+		VPORT_COUNTER_OFF(received_eth_broadcast.packets) },
+	{ "rx_vport_broadcast_bytes",
+		VPORT_COUNTER_OFF(received_eth_broadcast.octets) },
+	{ "tx_vport_broadcast_packets",
+		VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
+	{ "tx_vport_broadcast_bytes",
+		VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+	{ "rx_vport_rdma_unicast_packets",
+		VPORT_COUNTER_OFF(received_ib_unicast.packets) },
+	{ "rx_vport_rdma_unicast_bytes",
+		VPORT_COUNTER_OFF(received_ib_unicast.octets) },
+	{ "tx_vport_rdma_unicast_packets",
+		VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
+	{ "tx_vport_rdma_unicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
+	{ "rx_vport_rdma_multicast_packets",
+		VPORT_COUNTER_OFF(received_ib_multicast.packets) },
+	{ "rx_vport_rdma_multicast_bytes",
+		VPORT_COUNTER_OFF(received_ib_multicast.octets) },
+	{ "tx_vport_rdma_multicast_packets",
+		VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
+	{ "tx_vport_rdma_multicast_bytes",
+		VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
+};
+
+#define NUM_VPORT_COUNTERS		ARRAY_SIZE(vport_stats_desc)
+
+static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv)
+{
+	return NUM_VPORT_COUNTERS;
+}
+
+static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data,
+					int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				      int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out,
+						  vport_stats_desc, i);
+	return idx;
+}
+
+#define PPORT_802_3_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_802_3_stats_desc[] = {
+	{ "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+	{ "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+	{ "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+	{ "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+	{ "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+	{ "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+	{ "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+	{ "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+	{ "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+	{ "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+	{ "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+	{ "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+	{ "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+	{ "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+	{ "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+	{ "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+	{ "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+	{ "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+};
+
+#define NUM_PPORT_802_3_COUNTERS	ARRAY_SIZE(pport_802_3_stats_desc)
+
+static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv)
+{
+	return NUM_PPORT_802_3_COUNTERS;
+}
+
+static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data,
+					int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				      int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters,
+						  pport_802_3_stats_desc, i);
+	return idx;
+}
+
+#define PPORT_2863_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2863_stats_desc[] = {
+	{ "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+	{ "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+	{ "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
+};
+
+#define NUM_PPORT_2863_COUNTERS		ARRAY_SIZE(pport_2863_stats_desc)
+
+static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv)
+{
+	return NUM_PPORT_2863_COUNTERS;
+}
+
+static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data,
+				       int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				     int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters,
+						  pport_2863_stats_desc, i);
+	return idx;
+}
+
+#define PPORT_2819_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_2819_stats_desc[] = {
+	{ "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+	{ "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+	{ "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+	{ "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+	{ "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+	{ "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+	{ "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+	{ "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+	{ "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+	{ "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+	{ "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+	{ "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+	{ "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+};
+
+#define NUM_PPORT_2819_COUNTERS		ARRAY_SIZE(pport_2819_stats_desc)
+
+static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv)
+{
+	return NUM_PPORT_2819_COUNTERS;
+}
+
+static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data,
+				       int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				     int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
+						  pport_2819_stats_desc, i);
+	return idx;
+}
+
+#define PPORT_PHY_STATISTICAL_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.phys_layer_statistical_cntrs.c##_high)
+static const struct counter_desc pport_phy_statistical_stats_desc[] = {
+	{ "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) },
+	{ "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
+};
+
+#define NUM_PPORT_PHY_COUNTERS		ARRAY_SIZE(pport_phy_statistical_stats_desc)
+
+static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
+{
+	return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
+		NUM_PPORT_PHY_COUNTERS : 0;
+}
+
+static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
+				      int idx)
+{
+	int i;
+
+	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+		for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       pport_phy_statistical_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+	int i;
+
+	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+		for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+						    pport_phy_statistical_stats_desc, i);
+	return idx;
+}
+
+#define PPORT_ETH_EXT_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pport_eth_ext_stats_desc[] = {
+	{ "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) },
+};
+
+#define NUM_PPORT_ETH_EXT_COUNTERS	ARRAY_SIZE(pport_eth_ext_stats_desc)
+
+static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv)
+{
+	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+		return NUM_PPORT_ETH_EXT_COUNTERS;
+
+	return 0;
+}
+
+static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data,
+					  int idx)
+{
+	int i;
+
+	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+		for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       pport_eth_ext_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data,
+					int idx)
+{
+	int i;
+
+	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
+		for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters,
+						    pport_eth_ext_stats_desc, i);
+	return idx;
+}
+
+#define PCIE_PERF_OFF(c) \
+	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
+static const struct counter_desc pcie_perf_stats_desc[] = {
+	{ "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
+	{ "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
+};
+
+#define PCIE_PERF_OFF64(c) \
+	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+static const struct counter_desc pcie_perf_stats_desc64[] = {
+	{ "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) },
+};
+
+static const struct counter_desc pcie_perf_stall_stats_desc[] = {
+	{ "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) },
+	{ "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) },
+	{ "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) },
+	{ "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) },
+};
+
+#define NUM_PCIE_PERF_COUNTERS		ARRAY_SIZE(pcie_perf_stats_desc)
+#define NUM_PCIE_PERF_COUNTERS64	ARRAY_SIZE(pcie_perf_stats_desc64)
+#define NUM_PCIE_PERF_STALL_COUNTERS	ARRAY_SIZE(pcie_perf_stall_stats_desc)
+
+static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv)
+{
+	int num_stats = 0;
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+		num_stats += NUM_PCIE_PERF_COUNTERS;
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+		num_stats += NUM_PCIE_PERF_COUNTERS64;
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+		num_stats += NUM_PCIE_PERF_STALL_COUNTERS;
+
+	return num_stats;
+}
+
+static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data,
+				       int idx)
+{
+	int i;
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+		for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       pcie_perf_stats_desc[i].format);
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+		for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       pcie_perf_stats_desc64[i].format);
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+		for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       pcie_perf_stall_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				     int idx)
+{
+	int i;
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+		for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+						    pcie_perf_stats_desc, i);
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+		for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters,
+						    pcie_perf_stats_desc64, i);
+
+	if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
+		for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+						    pcie_perf_stall_stats_desc, i);
+	return idx;
+}
+
+#define PPORT_PER_PRIO_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_per_prio_grp_data_layout.c##_high)
+static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
+	{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+	{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+	{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+	{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
+};
+
+#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS	ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
+
+static int mlx5e_grp_per_prio_traffic_get_num_stats(struct mlx5e_priv *priv)
+{
+	return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv,
+						   u8 *data,
+						   int idx)
+{
+	int i, prio;
+
+	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_traffic_stats_desc[i].format, prio);
+	}
+
+	return idx;
+}
+
+static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv,
+						 u64 *data,
+						 int idx)
+{
+	int i, prio;
+
+	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+						    pport_per_prio_traffic_stats_desc, i);
+	}
+
+	return idx;
+}
+
+static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
+	/* %s is "global" or "prio{i}" */
+	{ "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+	{ "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+	{ "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+	{ "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+};
+
+#define NUM_PPORT_PER_PRIO_PFC_COUNTERS		ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
+
+static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 pfc_en_tx;
+	u8 pfc_en_rx;
+	int err;
+
+	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return 0;
+
+	err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx);
+
+	return err ? 0 : pfc_en_tx | pfc_en_rx;
+}
+
+static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 rx_pause;
+	u32 tx_pause;
+	int err;
+
+	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return false;
+
+	err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause);
+
+	return err ? false : rx_pause | tx_pause;
+}
+
+static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv)
+{
+	return (mlx5e_query_global_pause_combined(priv) +
+		hweight8(mlx5e_query_pfc_combined(priv))) *
+		NUM_PPORT_PER_PRIO_PFC_COUNTERS;
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
+					       u8 *data,
+					       int idx)
+{
+	unsigned long pfc_combined;
+	int i, prio;
+
+	pfc_combined = mlx5e_query_pfc_combined(priv);
+	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			char pfc_string[ETH_GSTRING_LEN];
+
+			snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, pfc_string);
+		}
+	}
+
+	if (mlx5e_query_global_pause_combined(priv)) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, "global");
+		}
+	}
+
+	return idx;
+}
+
+static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv,
+					     u64 *data,
+					     int idx)
+{
+	unsigned long pfc_combined;
+	int i, prio;
+
+	pfc_combined = mlx5e_query_pfc_combined(priv);
+	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
+						    pport_per_prio_pfc_stats_desc, i);
+		}
+	}
+
+	if (mlx5e_query_global_pause_combined(priv)) {
+		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
+			data[idx++] =
+				MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+						    pport_per_prio_pfc_stats_desc, i);
+		}
+	}
+
+	return idx;
+}
+
+static const struct counter_desc mlx5e_pme_status_desc[] = {
+	{ "module_unplug", 8 },
+};
+
+static const struct counter_desc mlx5e_pme_error_desc[] = {
+	{ "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
+	{ "module_high_temp", 48 },       /* high temperature */
+	{ "module_bad_shorted", 56 },    /* bad or shorted cable/module */
+};
+
+#define NUM_PME_STATUS_STATS		ARRAY_SIZE(mlx5e_pme_status_desc)
+#define NUM_PME_ERR_STATS		ARRAY_SIZE(mlx5e_pme_error_desc)
+
+static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv)
+{
+	return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS;
+}
+
+static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
+				      int idx)
+{
+	int i;
+
+	for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format);
+
+	for (i = 0; i < NUM_PME_ERR_STATS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
+
+	return idx;
+}
+
+static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				    int idx)
+{
+	struct mlx5_priv *mlx5_priv = &priv->mdev->priv;
+	int i;
+
+	for (i = 0; i < NUM_PME_STATUS_STATS; i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+						   mlx5e_pme_status_desc, i);
+
+	for (i = 0; i < NUM_PME_ERR_STATS; i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+						   mlx5e_pme_error_desc, i);
+
+	return idx;
+}
+
+static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv)
+{
+	return mlx5e_ipsec_get_count(priv);
+}
+
+static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data,
+					int idx)
+{
+	return idx + mlx5e_ipsec_get_strings(priv,
+					     data + idx * ETH_GSTRING_LEN);
+}
+
+static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				      int idx)
+{
+	return idx + mlx5e_ipsec_get_stats(priv, data + idx);
+}
+
+static const struct counter_desc rq_stats_desc[] = {
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
+};
+
+static const struct counter_desc sq_stats_desc[] = {
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+};
+
+#define NUM_RQ_STATS			ARRAY_SIZE(rq_stats_desc)
+#define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
+
+static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
+{
+	return (NUM_RQ_STATS * priv->channels.num) +
+		(NUM_SQ_STATS * priv->channels.num * priv->channels.params.num_tc);
+}
+
+static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
+					   int idx)
+{
+	int i, j, tc;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return idx;
+
+	for (i = 0; i < priv->channels.num; i++)
+		for (j = 0; j < NUM_RQ_STATS; j++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i);
+
+	for (tc = 0; tc < priv->channels.params.num_tc; tc++)
+		for (i = 0; i < priv->channels.num; i++)
+			for (j = 0; j < NUM_SQ_STATS; j++)
+				sprintf(data + (idx++) * ETH_GSTRING_LEN,
+					sq_stats_desc[j].format,
+					priv->channel_tc2txq[i][tc]);
+
+	return idx;
+}
+
+static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
+					 int idx)
+{
+	struct mlx5e_channels *channels = &priv->channels;
+	int i, j, tc;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return idx;
+
+	for (i = 0; i < channels->num; i++)
+		for (j = 0; j < NUM_RQ_STATS; j++)
+			data[idx++] =
+				MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats,
+						     rq_stats_desc, j);
+
+	for (tc = 0; tc < priv->channels.params.num_tc; tc++)
+		for (i = 0; i < channels->num; i++)
+			for (j = 0; j < NUM_SQ_STATS; j++)
+				data[idx++] =
+					MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats,
+							     sq_stats_desc, j);
+
+	return idx;
+}
+
+const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
+	{
+		.get_num_stats = mlx5e_grp_sw_get_num_stats,
+		.fill_strings = mlx5e_grp_sw_fill_strings,
+		.fill_stats = mlx5e_grp_sw_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_q_get_num_stats,
+		.fill_strings = mlx5e_grp_q_fill_strings,
+		.fill_stats = mlx5e_grp_q_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_vport_get_num_stats,
+		.fill_strings = mlx5e_grp_vport_fill_strings,
+		.fill_stats = mlx5e_grp_vport_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_802_3_get_num_stats,
+		.fill_strings = mlx5e_grp_802_3_fill_strings,
+		.fill_stats = mlx5e_grp_802_3_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_2863_get_num_stats,
+		.fill_strings = mlx5e_grp_2863_fill_strings,
+		.fill_stats = mlx5e_grp_2863_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_2819_get_num_stats,
+		.fill_strings = mlx5e_grp_2819_fill_strings,
+		.fill_stats = mlx5e_grp_2819_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_phy_get_num_stats,
+		.fill_strings = mlx5e_grp_phy_fill_strings,
+		.fill_stats = mlx5e_grp_phy_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_eth_ext_get_num_stats,
+		.fill_strings = mlx5e_grp_eth_ext_fill_strings,
+		.fill_stats = mlx5e_grp_eth_ext_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_pcie_get_num_stats,
+		.fill_strings = mlx5e_grp_pcie_fill_strings,
+		.fill_stats = mlx5e_grp_pcie_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_per_prio_traffic_get_num_stats,
+		.fill_strings = mlx5e_grp_per_prio_traffic_fill_strings,
+		.fill_stats = mlx5e_grp_per_prio_traffic_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_per_prio_pfc_get_num_stats,
+		.fill_strings = mlx5e_grp_per_prio_pfc_fill_strings,
+		.fill_stats = mlx5e_grp_per_prio_pfc_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_pme_get_num_stats,
+		.fill_strings = mlx5e_grp_pme_fill_strings,
+		.fill_stats = mlx5e_grp_pme_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_ipsec_get_num_stats,
+		.fill_strings = mlx5e_grp_ipsec_fill_strings,
+		.fill_stats = mlx5e_grp_ipsec_fill_stats,
+	},
+	{
+		.get_num_stats = mlx5e_grp_channels_get_num_stats,
+		.fill_strings = mlx5e_grp_channels_fill_strings,
+		.fill_stats = mlx5e_grp_channels_fill_stats,
+	}
+};
+
+const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index f8637213afc0..d679e21f686e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -59,8 +59,10 @@ struct mlx5e_sw_stats {
 	u64 tx_tso_bytes;
 	u64 tx_tso_inner_packets;
 	u64 tx_tso_inner_bytes;
+	u64 tx_added_vlan_packets;
 	u64 rx_lro_packets;
 	u64 rx_lro_bytes;
+	u64 rx_removed_vlan_packets;
 	u64 rx_csum_unnecessary;
 	u64 rx_csum_none;
 	u64 rx_csum_complete;
@@ -91,54 +93,10 @@ struct mlx5e_sw_stats {
 	u64 link_down_events_phy;
 };
 
-static const struct counter_desc sw_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
-};
-
 struct mlx5e_qcounter_stats {
 	u32 rx_out_of_buffer;
 };
 
-static const struct counter_desc q_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
-};
-
-#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
 #define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \
 						vstats->query_vport_out, c)
 
@@ -146,83 +104,22 @@ struct mlx5e_vport_stats {
 	__be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)];
 };
 
-static const struct counter_desc vport_stats_desc[] = {
-	{ "rx_vport_unicast_packets",
-		VPORT_COUNTER_OFF(received_eth_unicast.packets) },
-	{ "rx_vport_unicast_bytes",
-		VPORT_COUNTER_OFF(received_eth_unicast.octets) },
-	{ "tx_vport_unicast_packets",
-		VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) },
-	{ "tx_vport_unicast_bytes",
-		VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) },
-	{ "rx_vport_multicast_packets",
-		VPORT_COUNTER_OFF(received_eth_multicast.packets) },
-	{ "rx_vport_multicast_bytes",
-		VPORT_COUNTER_OFF(received_eth_multicast.octets) },
-	{ "tx_vport_multicast_packets",
-		VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) },
-	{ "tx_vport_multicast_bytes",
-		VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) },
-	{ "rx_vport_broadcast_packets",
-		VPORT_COUNTER_OFF(received_eth_broadcast.packets) },
-	{ "rx_vport_broadcast_bytes",
-		VPORT_COUNTER_OFF(received_eth_broadcast.octets) },
-	{ "tx_vport_broadcast_packets",
-		VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
-	{ "tx_vport_broadcast_bytes",
-		VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
-	{ "rx_vport_rdma_unicast_packets",
-		VPORT_COUNTER_OFF(received_ib_unicast.packets) },
-	{ "rx_vport_rdma_unicast_bytes",
-		VPORT_COUNTER_OFF(received_ib_unicast.octets) },
-	{ "tx_vport_rdma_unicast_packets",
-		VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) },
-	{ "tx_vport_rdma_unicast_bytes",
-		VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) },
-	{ "rx_vport_rdma_multicast_packets",
-		VPORT_COUNTER_OFF(received_ib_multicast.packets) },
-	{ "rx_vport_rdma_multicast_bytes",
-		VPORT_COUNTER_OFF(received_ib_multicast.octets) },
-	{ "tx_vport_rdma_multicast_packets",
-		VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) },
-	{ "tx_vport_rdma_multicast_bytes",
-		VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) },
-};
-
-#define PPORT_802_3_OFF(c) \
-	MLX5_BYTE_OFF(ppcnt_reg, \
-		      counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
 #define PPORT_802_3_GET(pstats, c) \
 	MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \
 		   counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
-#define PPORT_2863_OFF(c) \
-	MLX5_BYTE_OFF(ppcnt_reg, \
-		      counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
 #define PPORT_2863_GET(pstats, c) \
 	MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \
 		   counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
-#define PPORT_2819_OFF(c) \
-	MLX5_BYTE_OFF(ppcnt_reg, \
-		      counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
 #define PPORT_2819_GET(pstats, c) \
 	MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \
 		   counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
-#define PPORT_PHY_STATISTICAL_OFF(c) \
-	MLX5_BYTE_OFF(ppcnt_reg, \
-		      counter_set.phys_layer_statistical_cntrs.c##_high)
 #define PPORT_PHY_STATISTICAL_GET(pstats, c) \
 	MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \
 		   counter_set.phys_layer_statistical_cntrs.c##_high)
-#define PPORT_PER_PRIO_OFF(c) \
-	MLX5_BYTE_OFF(ppcnt_reg, \
-		      counter_set.eth_per_prio_grp_data_layout.c##_high)
 #define PPORT_PER_PRIO_GET(pstats, prio, c) \
 	MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \
 		   counter_set.eth_per_prio_grp_data_layout.c##_high)
 #define NUM_PPORT_PRIO				8
-#define PPORT_ETH_EXT_OFF(c) \
-	MLX5_BYTE_OFF(ppcnt_reg, \
-		      counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
 #define PPORT_ETH_EXT_GET(pstats, c) \
 	MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \
 		   counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
@@ -237,82 +134,10 @@ struct mlx5e_pport_stats {
 	__be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
 };
 
-static const struct counter_desc pport_802_3_stats_desc[] = {
-	{ "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
-	{ "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
-	{ "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
-	{ "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
-	{ "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
-	{ "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
-	{ "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
-	{ "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
-	{ "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
-	{ "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
-	{ "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
-	{ "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
-	{ "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
-	{ "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
-	{ "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
-	{ "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
-	{ "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
-	{ "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
-};
-
-static const struct counter_desc pport_2863_stats_desc[] = {
-	{ "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
-	{ "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
-	{ "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
-};
-
-static const struct counter_desc pport_2819_stats_desc[] = {
-	{ "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
-	{ "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
-	{ "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
-	{ "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
-	{ "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
-	{ "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
-	{ "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
-	{ "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
-	{ "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
-	{ "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
-	{ "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
-	{ "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
-	{ "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
-};
-
-static const struct counter_desc pport_phy_statistical_stats_desc[] = {
-	{ "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) },
-	{ "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
-};
-
-static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
-	{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
-	{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
-	{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
-	{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
-};
-
-static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
-	/* %s is "global" or "prio{i}" */
-	{ "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) },
-	{ "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
-	{ "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) },
-	{ "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
-	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
-};
-
-static const struct counter_desc pport_eth_ext_stats_desc[] = {
-	{ "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) },
-};
-
-#define PCIE_PERF_OFF(c) \
-	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
 #define PCIE_PERF_GET(pcie_stats, c) \
 	MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
 		 counter_set.pcie_perf_cntrs_grp_data_layout.c)
 
-#define PCIE_PERF_OFF64(c) \
-	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
 #define PCIE_PERF_GET64(pcie_stats, c) \
 	MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
 		   counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
@@ -321,22 +146,6 @@ struct mlx5e_pcie_stats {
 	__be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
 };
 
-static const struct counter_desc pcie_perf_stats_desc[] = {
-	{ "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
-	{ "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
-};
-
-static const struct counter_desc pcie_perf_stats_desc64[] = {
-	{ "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) },
-};
-
-static const struct counter_desc pcie_perf_stall_stats_desc[] = {
-	{ "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) },
-	{ "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) },
-	{ "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) },
-	{ "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) },
-};
-
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
@@ -346,6 +155,7 @@ struct mlx5e_rq_stats {
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
+	u64 removed_vlan_packets;
 	u64 xdp_drop;
 	u64 xdp_tx;
 	u64 xdp_tx_full;
@@ -362,31 +172,6 @@ struct mlx5e_rq_stats {
 	u64 cache_waive;
 };
 
-static const struct counter_desc rq_stats_desc[] = {
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
-};
-
 struct mlx5e_sq_stats {
 	/* commonly accessed in data path */
 	u64 packets;
@@ -398,6 +183,7 @@ struct mlx5e_sq_stats {
 	u64 tso_inner_bytes;
 	u64 csum_partial;
 	u64 csum_partial_inner;
+	u64 added_vlan_packets;
 	u64 nop;
 	/* less likely accessed in data path */
 	u64 csum_none;
@@ -406,61 +192,6 @@ struct mlx5e_sq_stats {
 	u64 dropped;
 };
 
-static const struct counter_desc sq_stats_desc[] = {
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
-	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
-};
-
-#define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
-#define NUM_Q_COUNTERS			ARRAY_SIZE(q_stats_desc)
-#define NUM_VPORT_COUNTERS		ARRAY_SIZE(vport_stats_desc)
-#define NUM_PPORT_802_3_COUNTERS	ARRAY_SIZE(pport_802_3_stats_desc)
-#define NUM_PPORT_2863_COUNTERS		ARRAY_SIZE(pport_2863_stats_desc)
-#define NUM_PPORT_2819_COUNTERS		ARRAY_SIZE(pport_2819_stats_desc)
-#define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \
-	(ARRAY_SIZE(pport_phy_statistical_stats_desc) * \
-	 MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
-#define NUM_PCIE_PERF_COUNTERS(priv) \
-	(ARRAY_SIZE(pcie_perf_stats_desc) * \
-	 MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
-#define NUM_PCIE_PERF_COUNTERS64(priv) \
-	(ARRAY_SIZE(pcie_perf_stats_desc64) * \
-	 MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
-#define NUM_PCIE_PERF_STALL_COUNTERS(priv) \
-	(ARRAY_SIZE(pcie_perf_stall_stats_desc) * \
-	 MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
-#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \
-	ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
-#define NUM_PPORT_PER_PRIO_PFC_COUNTERS \
-	ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
-#define NUM_PPORT_ETH_EXT_COUNTERS(priv) \
-	(ARRAY_SIZE(pport_eth_ext_stats_desc) * \
-	 MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
-#define NUM_PPORT_COUNTERS(priv)	(NUM_PPORT_802_3_COUNTERS + \
-					 NUM_PPORT_2863_COUNTERS  + \
-					 NUM_PPORT_2819_COUNTERS  + \
-					 NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \
-					 NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \
-					 NUM_PPORT_PRIO + \
-					 NUM_PPORT_ETH_EXT_COUNTERS(priv))
-#define NUM_PCIE_COUNTERS(priv)		(NUM_PCIE_PERF_COUNTERS(priv) + \
-					 NUM_PCIE_PERF_COUNTERS64(priv) +\
-					 NUM_PCIE_PERF_STALL_COUNTERS(priv))
-#define NUM_RQ_STATS			ARRAY_SIZE(rq_stats_desc)
-#define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
-
 struct mlx5e_stats {
 	struct mlx5e_sw_stats sw;
 	struct mlx5e_qcounter_stats qcnt;
@@ -470,14 +201,14 @@ struct mlx5e_stats {
 	struct mlx5e_pcie_stats pcie;
 };
 
-static const struct counter_desc mlx5e_pme_status_desc[] = {
-	{ "module_unplug", 8 },
+struct mlx5e_priv;
+struct mlx5e_stats_grp {
+	int (*get_num_stats)(struct mlx5e_priv *priv);
+	int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx);
+	int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx);
 };
 
-static const struct counter_desc mlx5e_pme_error_desc[] = {
-	{ "module_bus_stuck", 16 },       /* bus stuck (I2C or data shorted) */
-	{ "module_high_temp", 48 },       /* high temperature */
-	{ "module_bad_shorted", 56 },    /* bad or shorted cable/module */
-};
+extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
+extern const int mlx5e_num_stats_grps;
 
 #endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9ba1f72060aa..55979ec2e88a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -90,8 +90,8 @@ enum {
 	MLX5_HEADER_TYPE_NVGRE = 0x1,
 };
 
-#define MLX5E_TC_TABLE_NUM_ENTRIES 1024
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
+#define MLX5E_TC_TABLE_MAX_GROUP_SIZE (1 << 16)
 
 struct mod_hdr_key {
 	int num_actions;
@@ -263,10 +263,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	}
 
 	if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
+		int tc_grp_size, tc_tbl_size;
+		u32 max_flow_counter;
+
+		max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+				    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+
+		tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
+
+		tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
+				    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+
 		priv->fs.tc.t =
 			mlx5_create_auto_grouped_flow_table(priv->fs.ns,
 							    MLX5E_TC_PRIO,
-							    MLX5E_TC_TABLE_NUM_ENTRIES,
+							    tc_tbl_size,
 							    MLX5E_TC_TABLE_NUM_GROUPS,
 							    0, 0);
 		if (IS_ERR(priv->fs.tc.t)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 1d6925d4369a..569b42a01026 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -32,9 +32,11 @@
 
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
+#include <net/dsfield.h>
 #include "en.h"
 #include "ipoib/ipoib.h"
 #include "en_accel/ipsec_rxtx.h"
+#include "lib/clock.h"
 
 #define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
@@ -85,6 +87,20 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
 	}
 }
 
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+	int dscp_cp = 0;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+	return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -96,8 +112,13 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (!netdev_get_num_tc(dev))
 		return channel_ix;
 
-	if (skb_vlan_tag_present(skb))
-		up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
+		up = mlx5e_get_dscp_up(priv, skb);
+	else
+#endif
+		if (skb_vlan_tag_present(skb))
+			up = skb->vlan_tci >> VLAN_PRIO_SHIFT;
 
 	/* channel_ix can be larger than num_channels since
 	 * dev->num_real_tx_queues = num_channels * num_tc
@@ -340,6 +361,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		if (skb_vlan_tag_present(skb)) {
 			mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len);
 			ihs += VLAN_HLEN;
+			sq->stats.added_vlan_packets++;
 		} else {
 			memcpy(eseg->inline_hdr.start, skb_data, ihs);
 			mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs);
@@ -348,7 +370,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS);
 	} else if (skb_vlan_tag_present(skb)) {
 		eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
+		if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
+			eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN);
 		eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb));
+		sq->stats.added_vlan_packets++;
 	}
 
 	headlen = skb_len - skb->data_len;
@@ -452,8 +477,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 				     SKBTX_HW_TSTAMP)) {
 				struct skb_shared_hwtstamps hwts = {};
 
-				mlx5e_fill_hwstamp(sq->tstamp,
-						   get_cqe_ts(cqe), &hwts);
+				hwts.hwtstamp =
+					mlx5_timecounter_cyc2time(sq->clock,
+								  get_cqe_ts(cqe));
 				skb_tstamp_tx(skb, &hwts);
 			}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index fc606bfd1d6e..60771865c99c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -491,8 +491,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			break;
 
 		case MLX5_EVENT_TYPE_PPS_EVENT:
-			if (dev->event)
-				dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe);
+			mlx5_pps_event(dev, eqe);
 			break;
 
 		case MLX5_EVENT_TYPE_FPGA_ERROR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c77f4c0c7769..bbb140f517c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -157,7 +157,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 			    MLX5_MATCH_OUTER_HEADERS);
 	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_spec *spec;
 	void *mv_misc = NULL;
 	void *mc_misc = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d9fd8570b07c..1143d80119bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -306,7 +306,7 @@ static struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
 {
 	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
 	void *misc;
@@ -395,7 +395,7 @@ out_err:
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
 	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_spec *spec;
 	int err = 0;
@@ -670,7 +670,7 @@ struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 {
 	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
 	void *misc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 36ecc2b2e187..881e2e55840c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -40,7 +40,8 @@
 #include "eswitch.h"
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft, u32 underlay_qpn)
+			    struct mlx5_flow_table *ft, u32 underlay_qpn,
+			    bool disconnect)
 {
 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
@@ -52,7 +53,15 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 	MLX5_SET(set_flow_table_root_in, in, opcode,
 		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
 	MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
-	MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+
+	if (disconnect) {
+		MLX5_SET(set_flow_table_root_in, in, op_mod, 1);
+		MLX5_SET(set_flow_table_root_in, in, table_id, 0);
+	} else {
+		MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+		MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+	}
+
 	MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
 	if (ft->vport) {
 		MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index c6d7bdf255b6..71e2d0f37ad9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -71,8 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 			unsigned int index);
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft,
-			    u32 underlay_qpn);
+			    struct mlx5_flow_table *ft, u32 underlay_qpn,
+			    bool disconnect);
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 5a7bea688ec8..c70fd663a633 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -145,10 +145,10 @@ static struct init_tree_node {
 	}
 };
 
-enum fs_i_mutex_lock_class {
-	FS_MUTEX_GRANDPARENT,
-	FS_MUTEX_PARENT,
-	FS_MUTEX_CHILD
+enum fs_i_lock_class {
+	FS_LOCK_GRANDPARENT,
+	FS_LOCK_PARENT,
+	FS_LOCK_CHILD
 };
 
 static const struct rhashtable_params rhash_fte = {
@@ -168,10 +168,16 @@ static const struct rhashtable_params rhash_fg = {
 
 };
 
-static void del_rule(struct fs_node *node);
-static void del_flow_table(struct fs_node *node);
-static void del_flow_group(struct fs_node *node);
-static void del_fte(struct fs_node *node);
+static void del_hw_flow_table(struct fs_node *node);
+static void del_hw_flow_group(struct fs_node *node);
+static void del_hw_fte(struct fs_node *node);
+static void del_sw_flow_table(struct fs_node *node);
+static void del_sw_flow_group(struct fs_node *node);
+static void del_sw_fte(struct fs_node *node);
+/* Delete rule (destination) is special case that 
+ * requires to lock the FTE for all the deletion process.
+ */
+static void del_sw_hw_rule(struct fs_node *node);
 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 				struct mlx5_flow_destination *d2);
 static struct mlx5_flow_rule *
@@ -179,20 +185,22 @@ find_flow_rule(struct fs_fte *fte,
 	       struct mlx5_flow_destination *dest);
 
 static void tree_init_node(struct fs_node *node,
-			   unsigned int refcount,
-			   void (*remove_func)(struct fs_node *))
+			   void (*del_hw_func)(struct fs_node *),
+			   void (*del_sw_func)(struct fs_node *))
 {
-	atomic_set(&node->refcount, refcount);
+	refcount_set(&node->refcount, 1);
 	INIT_LIST_HEAD(&node->list);
 	INIT_LIST_HEAD(&node->children);
-	mutex_init(&node->lock);
-	node->remove_func = remove_func;
+	init_rwsem(&node->lock);
+	node->del_hw_func = del_hw_func;
+	node->del_sw_func = del_sw_func;
+	node->active = false;
 }
 
 static void tree_add_node(struct fs_node *node, struct fs_node *parent)
 {
 	if (parent)
-		atomic_inc(&parent->refcount);
+		refcount_inc(&parent->refcount);
 	node->parent = parent;
 
 	/* Parent is the root */
@@ -202,58 +210,78 @@ static void tree_add_node(struct fs_node *node, struct fs_node *parent)
 		node->root = parent->root;
 }
 
-static void tree_get_node(struct fs_node *node)
+static int tree_get_node(struct fs_node *node)
 {
-	atomic_inc(&node->refcount);
+	return refcount_inc_not_zero(&node->refcount);
 }
 
-static void nested_lock_ref_node(struct fs_node *node,
-				 enum fs_i_mutex_lock_class class)
+static void nested_down_read_ref_node(struct fs_node *node,
+				      enum fs_i_lock_class class)
 {
 	if (node) {
-		mutex_lock_nested(&node->lock, class);
-		atomic_inc(&node->refcount);
+		down_read_nested(&node->lock, class);
+		refcount_inc(&node->refcount);
 	}
 }
 
-static void lock_ref_node(struct fs_node *node)
+static void nested_down_write_ref_node(struct fs_node *node,
+				       enum fs_i_lock_class class)
 {
 	if (node) {
-		mutex_lock(&node->lock);
-		atomic_inc(&node->refcount);
+		down_write_nested(&node->lock, class);
+		refcount_inc(&node->refcount);
 	}
 }
 
-static void unlock_ref_node(struct fs_node *node)
+static void down_write_ref_node(struct fs_node *node)
 {
 	if (node) {
-		atomic_dec(&node->refcount);
-		mutex_unlock(&node->lock);
+		down_write(&node->lock);
+		refcount_inc(&node->refcount);
 	}
 }
 
+static void up_read_ref_node(struct fs_node *node)
+{
+	refcount_dec(&node->refcount);
+	up_read(&node->lock);
+}
+
+static void up_write_ref_node(struct fs_node *node)
+{
+	refcount_dec(&node->refcount);
+	up_write(&node->lock);
+}
+
 static void tree_put_node(struct fs_node *node)
 {
 	struct fs_node *parent_node = node->parent;
 
-	lock_ref_node(parent_node);
-	if (atomic_dec_and_test(&node->refcount)) {
-		if (parent_node)
+	if (refcount_dec_and_test(&node->refcount)) {
+		if (node->del_hw_func)
+			node->del_hw_func(node);
+		if (parent_node) {
+			/* Only root namespace doesn't have parent and we just
+			 * need to free its node.
+			 */
+			down_write_ref_node(parent_node);
 			list_del_init(&node->list);
-		if (node->remove_func)
-			node->remove_func(node);
-		kfree(node);
+			if (node->del_sw_func)
+				node->del_sw_func(node);
+			up_write_ref_node(parent_node);
+		} else {
+			kfree(node);
+		}
 		node = NULL;
 	}
-	unlock_ref_node(parent_node);
 	if (!node && parent_node)
 		tree_put_node(parent_node);
 }
 
 static int tree_remove_node(struct fs_node *node)
 {
-	if (atomic_read(&node->refcount) > 1) {
-		atomic_dec(&node->refcount);
+	if (refcount_read(&node->refcount) > 1) {
+		refcount_dec(&node->refcount);
 		return -EEXIST;
 	}
 	tree_put_node(node);
@@ -362,6 +390,15 @@ static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
 	return container_of(ns, struct mlx5_flow_root_namespace, ns);
 }
 
+static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
+{
+	struct mlx5_flow_root_namespace *root = find_root(node);
+
+	if (root)
+		return root->dev->priv.steering;
+	return NULL;
+}
+
 static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
 {
 	struct mlx5_flow_root_namespace *root = find_root(node);
@@ -371,26 +408,36 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
 	return NULL;
 }
 
-static void del_flow_table(struct fs_node *node)
+static void del_hw_flow_table(struct fs_node *node)
 {
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
-	struct fs_prio *prio;
 	int err;
 
 	fs_get_obj(ft, node);
 	dev = get_dev(&ft->node);
 
-	err = mlx5_cmd_destroy_flow_table(dev, ft);
-	if (err)
-		mlx5_core_warn(dev, "flow steering can't destroy ft\n");
-	ida_destroy(&ft->fte_allocator);
+	if (node->active) {
+		err = mlx5_cmd_destroy_flow_table(dev, ft);
+		if (err)
+			mlx5_core_warn(dev, "flow steering can't destroy ft\n");
+	}
+}
+
+static void del_sw_flow_table(struct fs_node *node)
+{
+	struct mlx5_flow_table *ft;
+	struct fs_prio *prio;
+
+	fs_get_obj(ft, node);
+
 	rhltable_destroy(&ft->fgs_hash);
 	fs_get_obj(prio, ft->node.parent);
 	prio->num_ft--;
+	kfree(ft);
 }
 
-static void del_rule(struct fs_node *node)
+static void del_sw_hw_rule(struct fs_node *node)
 {
 	struct mlx5_flow_rule *rule;
 	struct mlx5_flow_table *ft;
@@ -406,7 +453,6 @@ static void del_rule(struct fs_node *node)
 	fs_get_obj(fg, fte->node.parent);
 	fs_get_obj(ft, fg->node.parent);
 	trace_mlx5_fs_del_rule(rule);
-	list_del(&rule->node.list);
 	if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		mutex_lock(&rule->dest_attr.ft->lock);
 		list_del(&rule->next_ft);
@@ -434,117 +480,203 @@ out:
 				       "%s can't del rule fg id=%d fte_index=%d\n",
 				       __func__, fg->id, fte->index);
 	}
+	kfree(rule);
 }
 
-static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg)
+static void del_hw_fte(struct fs_node *node)
 {
 	struct mlx5_flow_table *ft;
-	int ret;
+	struct mlx5_flow_group *fg;
+	struct mlx5_core_dev *dev;
+	struct fs_fte *fte;
+	int err;
 
-	ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
-	WARN_ON(ret);
-	fte->status = 0;
+	fs_get_obj(fte, node);
+	fs_get_obj(fg, fte->node.parent);
 	fs_get_obj(ft, fg->node.parent);
-	ida_simple_remove(&ft->fte_allocator, fte->index);
+
+	trace_mlx5_fs_del_fte(fte);
+	dev = get_dev(&ft->node);
+	if (node->active) {
+		err = mlx5_cmd_delete_fte(dev, ft,
+					  fte->index);
+		if (err)
+			mlx5_core_warn(dev,
+				       "flow steering can't delete fte in index %d of flow group id %d\n",
+				       fte->index, fg->id);
+	}
 }
 
-static void del_fte(struct fs_node *node)
+static void del_sw_fte(struct fs_node *node)
 {
-	struct mlx5_flow_table *ft;
+	struct mlx5_flow_steering *steering = get_steering(node);
 	struct mlx5_flow_group *fg;
-	struct mlx5_core_dev *dev;
 	struct fs_fte *fte;
 	int err;
 
 	fs_get_obj(fte, node);
 	fs_get_obj(fg, fte->node.parent);
-	fs_get_obj(ft, fg->node.parent);
-	trace_mlx5_fs_del_fte(fte);
 
-	dev = get_dev(&ft->node);
-	err = mlx5_cmd_delete_fte(dev, ft,
-				  fte->index);
-	if (err)
-		mlx5_core_warn(dev,
-			       "flow steering can't delete fte in index %d of flow group id %d\n",
-			       fte->index, fg->id);
-
-	destroy_fte(fte, fg);
+	err = rhashtable_remove_fast(&fg->ftes_hash,
+				     &fte->hash,
+				     rhash_fte);
+	WARN_ON(err);
+	ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
+	kmem_cache_free(steering->ftes_cache, fte);
 }
 
-static void del_flow_group(struct fs_node *node)
+static void del_hw_flow_group(struct fs_node *node)
 {
 	struct mlx5_flow_group *fg;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
-	int err;
 
 	fs_get_obj(fg, node);
 	fs_get_obj(ft, fg->node.parent);
 	dev = get_dev(&ft->node);
 	trace_mlx5_fs_del_fg(fg);
 
-	if (ft->autogroup.active)
-		ft->autogroup.num_groups--;
+	if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
+		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
+			       fg->id, ft->id);
+}
+
+static void del_sw_flow_group(struct fs_node *node)
+{
+	struct mlx5_flow_steering *steering = get_steering(node);
+	struct mlx5_flow_group *fg;
+	struct mlx5_flow_table *ft;
+	int err;
+
+	fs_get_obj(fg, node);
+	fs_get_obj(ft, fg->node.parent);
 
 	rhashtable_destroy(&fg->ftes_hash);
+	ida_destroy(&fg->fte_allocator);
+	if (ft->autogroup.active)
+		ft->autogroup.num_groups--;
 	err = rhltable_remove(&ft->fgs_hash,
 			      &fg->hash,
 			      rhash_fg);
 	WARN_ON(err);
-	if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
-		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
-			       fg->id, ft->id);
+	kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
+{
+	int index;
+	int ret;
+
+	index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
+	if (index < 0)
+		return index;
+
+	fte->index = index + fg->start_index;
+	ret = rhashtable_insert_fast(&fg->ftes_hash,
+				     &fte->hash,
+				     rhash_fte);
+	if (ret)
+		goto err_ida_remove;
+
+	tree_add_node(&fte->node, &fg->node);
+	list_add_tail(&fte->node.list, &fg->node.children);
+	return 0;
+
+err_ida_remove:
+	ida_simple_remove(&fg->fte_allocator, index);
+	return ret;
 }
 
-static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
+static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
 				u32 *match_value,
-				unsigned int index)
+				struct mlx5_flow_act *flow_act)
 {
+	struct mlx5_flow_steering *steering = get_steering(&ft->node);
 	struct fs_fte *fte;
 
-	fte = kzalloc(sizeof(*fte), GFP_KERNEL);
+	fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
 	if (!fte)
 		return ERR_PTR(-ENOMEM);
 
 	memcpy(fte->val, match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
 	fte->flow_tag = flow_act->flow_tag;
-	fte->index = index;
 	fte->action = flow_act->action;
 	fte->encap_id = flow_act->encap_id;
 	fte->modify_id = flow_act->modify_id;
 
+	tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+
 	return fte;
 }
 
-static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
+static void dealloc_flow_group(struct mlx5_flow_steering *steering,
+			       struct mlx5_flow_group *fg)
+{
+	rhashtable_destroy(&fg->ftes_hash);
+	kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
+						u8 match_criteria_enable,
+						void *match_criteria,
+						int start_index,
+						int end_index)
 {
 	struct mlx5_flow_group *fg;
-	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
-					    create_fg_in, match_criteria);
-	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
-					    create_fg_in,
-					    match_criteria_enable);
 	int ret;
 
-	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
+	fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
 	if (!fg)
 		return ERR_PTR(-ENOMEM);
 
 	ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
 	if (ret) {
-		kfree(fg);
+		kmem_cache_free(steering->fgs_cache, fg);
 		return ERR_PTR(ret);
-	}
+}
+	ida_init(&fg->fte_allocator);
 	fg->mask.match_criteria_enable = match_criteria_enable;
 	memcpy(&fg->mask.match_criteria, match_criteria,
 	       sizeof(fg->mask.match_criteria));
 	fg->node.type =  FS_TYPE_FLOW_GROUP;
-	fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
-				   start_flow_index);
-	fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
-				end_flow_index) - fg->start_index + 1;
+	fg->start_index = start_index;
+	fg->max_ftes = end_index - start_index + 1;
+
+	return fg;
+}
+
+static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
+						       u8 match_criteria_enable,
+						       void *match_criteria,
+						       int start_index,
+						       int end_index,
+						       struct list_head *prev)
+{
+	struct mlx5_flow_steering *steering = get_steering(&ft->node);
+	struct mlx5_flow_group *fg;
+	int ret;
+
+	fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
+			      start_index, end_index);
+	if (IS_ERR(fg))
+		return fg;
+
+	/* initialize refcnt, add to parent list */
+	ret = rhltable_insert(&ft->fgs_hash,
+			      &fg->hash,
+			      rhash_fg);
+	if (ret) {
+		dealloc_flow_group(steering, fg);
+		return ERR_PTR(ret);
+	}
+
+	tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
+	tree_add_node(&fg->node, &ft->node);
+	/* Add node to group list */
+	list_add(&fg->node.list, prev);
+	atomic_inc(&ft->node.version);
+
 	return fg;
 }
 
@@ -575,7 +707,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
 	ft->flags = flags;
 	INIT_LIST_HEAD(&ft->fwd_rules);
 	mutex_init(&ft->lock);
-	ida_init(&ft->fte_allocator);
 
 	return ft;
 }
@@ -693,8 +824,10 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 				 *prio)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+	struct mlx5_ft_underlay_qp *uqp;
 	int min_level = INT_MAX;
 	int err;
+	u32 qpn;
 
 	if (root->root_ft)
 		min_level = root->root_ft->level;
@@ -702,10 +835,24 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	if (ft->level >= min_level)
 		return 0;
 
-	err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn);
+	if (list_empty(&root->underlay_qpns)) {
+		/* Don't set any QPN (zero) in case QPN list is empty */
+		qpn = 0;
+		err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false);
+	} else {
+		list_for_each_entry(uqp, &root->underlay_qpns, list) {
+			qpn = uqp->qpn;
+			err = mlx5_cmd_update_root_ft(root->dev, ft, qpn,
+						      false);
+			if (err)
+				break;
+		}
+	}
+
 	if (err)
-		mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
-			       ft->id);
+		mlx5_core_warn(root->dev,
+			       "Update root flow table of id(%u) qpn(%d) failed\n",
+			       ft->id, qpn);
 	else
 		root->root_ft = ft;
 
@@ -724,7 +871,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 	fs_get_obj(fte, rule->node.parent);
 	if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return -EINVAL;
-	lock_ref_node(&fte->node);
+	down_write_ref_node(&fte->node);
 	fs_get_obj(fg, fte->node.parent);
 	fs_get_obj(ft, fg->node.parent);
 
@@ -733,7 +880,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 				  ft, fg->id,
 				  modify_mask,
 				  fte);
-	unlock_ref_node(&fte->node);
+	up_write_ref_node(&fte->node);
 
 	return err;
 }
@@ -765,7 +912,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
 			     struct mlx5_flow_table *new_next_ft,
 			     struct mlx5_flow_table *old_next_ft)
 {
-	struct mlx5_flow_destination dest;
+	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_rule *iter;
 	int err = 0;
 
@@ -870,7 +1017,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 		goto unlock_root;
 	}
 
-	tree_init_node(&ft->node, 1, del_flow_table);
+	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
 	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
@@ -882,17 +1029,17 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	err = connect_flow_table(root->dev, ft, fs_prio);
 	if (err)
 		goto destroy_ft;
-	lock_ref_node(&fs_prio->node);
+	ft->node.active = true;
+	down_write_ref_node(&fs_prio->node);
 	tree_add_node(&ft->node, &fs_prio->node);
 	list_add_flow_table(ft, fs_prio);
 	fs_prio->num_ft++;
-	unlock_ref_node(&fs_prio->node);
+	up_write_ref_node(&fs_prio->node);
 	mutex_unlock(&root->chain_lock);
 	return ft;
 destroy_ft:
 	mlx5_cmd_destroy_flow_table(root->dev, ft);
 free_ft:
-	ida_destroy(&ft->fte_allocator);
 	kfree(ft);
 unlock_root:
 	mutex_unlock(&root->chain_lock);
@@ -960,54 +1107,6 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 }
 EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
 
-/* Flow table should be locked */
-static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft,
-							u32 *fg_in,
-							struct list_head
-							*prev_fg,
-							bool is_auto_fg)
-{
-	struct mlx5_flow_group *fg;
-	struct mlx5_core_dev *dev = get_dev(&ft->node);
-	int err;
-
-	if (!dev)
-		return ERR_PTR(-ENODEV);
-
-	fg = alloc_flow_group(fg_in);
-	if (IS_ERR(fg))
-		return fg;
-
-	err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg);
-	if (err)
-		goto err_free_fg;
-
-	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
-	if (err)
-		goto err_remove_fg;
-
-	if (ft->autogroup.active)
-		ft->autogroup.num_groups++;
-	/* Add node to tree */
-	tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
-	tree_add_node(&fg->node, &ft->node);
-	/* Add node to group list */
-	list_add(&fg->node.list, prev_fg);
-
-	trace_mlx5_fs_add_fg(fg);
-	return fg;
-
-err_remove_fg:
-	WARN_ON(rhltable_remove(&ft->fgs_hash,
-				&fg->hash,
-				rhash_fg));
-err_free_fg:
-	rhashtable_destroy(&fg->ftes_hash);
-	kfree(fg);
-
-	return ERR_PTR(err);
-}
-
 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 					       u32 *fg_in)
 {
@@ -1016,7 +1115,13 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
 					    fg_in,
 					    match_criteria_enable);
+	int start_index = MLX5_GET(create_flow_group_in, fg_in,
+				   start_flow_index);
+	int end_index = MLX5_GET(create_flow_group_in, fg_in,
+				 end_flow_index);
+	struct mlx5_core_dev *dev = get_dev(&ft->node);
 	struct mlx5_flow_group *fg;
+	int err;
 
 	if (!check_valid_mask(match_criteria_enable, match_criteria))
 		return ERR_PTR(-EINVAL);
@@ -1024,9 +1129,21 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	if (ft->autogroup.active)
 		return ERR_PTR(-EPERM);
 
-	lock_ref_node(&ft->node);
-	fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false);
-	unlock_ref_node(&ft->node);
+	down_write_ref_node(&ft->node);
+	fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
+				     start_index, end_index,
+				     ft->node.children.prev);
+	up_write_ref_node(&ft->node);
+	if (IS_ERR(fg))
+		return fg;
+
+	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
+	if (err) {
+		tree_put_node(&fg->node);
+		return ERR_PTR(err);
+	}
+	trace_mlx5_fs_add_fg(fg);
+	fg->node.active = true;
 
 	return fg;
 }
@@ -1067,7 +1184,7 @@ static void destroy_flow_handle(struct fs_fte *fte,
 				int i)
 {
 	for (; --i >= 0;) {
-		if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) {
+		if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
 			fte->dests_size--;
 			list_del(&handle->rule[i]->node.list);
 			kfree(handle->rule[i]);
@@ -1098,7 +1215,7 @@ create_flow_handle(struct fs_fte *fte,
 		if (dest) {
 			rule = find_flow_rule(fte, dest + i);
 			if (rule) {
-				atomic_inc(&rule->node.refcount);
+				refcount_inc(&rule->node.refcount);
 				goto rule_found;
 			}
 		}
@@ -1111,7 +1228,7 @@ create_flow_handle(struct fs_fte *fte,
 		/* Add dest to dests list- we need flow tables to be in the
 		 * end of the list for forward to next prio rules.
 		 */
-		tree_init_node(&rule->node, 1, del_rule);
+		tree_init_node(&rule->node, NULL, del_sw_hw_rule);
 		if (dest &&
 		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
 			list_add(&rule->node.list, &fte->node.children);
@@ -1167,7 +1284,9 @@ add_rule_fte(struct fs_fte *fte,
 	if (err)
 		goto free_handle;
 
+	fte->node.active = true;
 	fte->status |= FS_FTE_STATUS_EXISTING;
+	atomic_inc(&fte->node.version);
 
 out:
 	return handle;
@@ -1177,59 +1296,17 @@ free_handle:
 	return ERR_PTR(err);
 }
 
-static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
-				 u32 *match_value,
-				 struct mlx5_flow_act *flow_act)
-{
-	struct mlx5_flow_table *ft;
-	struct fs_fte *fte;
-	int index;
-	int ret;
-
-	fs_get_obj(ft, fg->node.parent);
-	index = ida_simple_get(&ft->fte_allocator, fg->start_index,
-			       fg->start_index + fg->max_ftes,
-			       GFP_KERNEL);
-	if (index < 0)
-		return ERR_PTR(index);
-
-	fte = alloc_fte(flow_act, match_value, index);
-	if (IS_ERR(fte)) {
-		ret = PTR_ERR(fte);
-		goto err_alloc;
-	}
-	ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
-	if (ret)
-		goto err_hash;
-
-	return fte;
-
-err_hash:
-	kfree(fte);
-err_alloc:
-	ida_simple_remove(&ft->fte_allocator, index);
-	return ERR_PTR(ret);
-}
-
-static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
-						u8 match_criteria_enable,
-						u32 *match_criteria)
+static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft,
+						     struct mlx5_flow_spec *spec)
 {
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct list_head *prev = &ft->node.children;
-	unsigned int candidate_index = 0;
 	struct mlx5_flow_group *fg;
-	void *match_criteria_addr;
+	unsigned int candidate_index = 0;
 	unsigned int group_size = 0;
-	u32 *in;
 
 	if (!ft->autogroup.active)
 		return ERR_PTR(-ENOENT);
 
-	in = kvzalloc(inlen, GFP_KERNEL);
-	if (!in)
-		return ERR_PTR(-ENOMEM);
-
 	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
 		/* We save place for flow groups in addition to max types */
 		group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
@@ -1247,25 +1324,55 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
 		prev = &fg->node.list;
 	}
 
-	if (candidate_index + group_size > ft->max_fte) {
-		fg = ERR_PTR(-ENOSPC);
+	if (candidate_index + group_size > ft->max_fte)
+		return ERR_PTR(-ENOSPC);
+
+	fg = alloc_insert_flow_group(ft,
+				     spec->match_criteria_enable,
+				     spec->match_criteria,
+				     candidate_index,
+				     candidate_index + group_size - 1,
+				     prev);
+	if (IS_ERR(fg))
 		goto out;
-	}
+
+	ft->autogroup.num_groups++;
+
+out:
+	return fg;
+}
+
+static int create_auto_flow_group(struct mlx5_flow_table *ft,
+				  struct mlx5_flow_group *fg)
+{
+	struct mlx5_core_dev *dev = get_dev(&ft->node);
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	void *match_criteria_addr;
+	int err;
+	u32 *in;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
 
 	MLX5_SET(create_flow_group_in, in, match_criteria_enable,
-		 match_criteria_enable);
-	MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
-	MLX5_SET(create_flow_group_in, in, end_flow_index,   candidate_index +
-		 group_size - 1);
+		 fg->mask.match_criteria_enable);
+	MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
+	MLX5_SET(create_flow_group_in, in, end_flow_index,   fg->start_index +
+		 fg->max_ftes - 1);
 	match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
 					   in, match_criteria);
-	memcpy(match_criteria_addr, match_criteria,
-	       MLX5_ST_SZ_BYTES(fte_match_param));
+	memcpy(match_criteria_addr, fg->mask.match_criteria,
+	       sizeof(fg->mask.match_criteria));
+
+	err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id);
+	if (!err) {
+		fg->node.active = true;
+		trace_mlx5_fs_add_fg(fg);
+	}
 
-	fg = create_flow_group_common(ft, in, prev, true);
-out:
 	kvfree(in);
-	return fg;
+	return err;
 }
 
 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
@@ -1340,60 +1447,30 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 					    struct fs_fte *fte)
 {
 	struct mlx5_flow_handle *handle;
-	struct mlx5_flow_table *ft;
+	int old_action;
 	int i;
+	int ret;
 
-	if (fte) {
-		int old_action;
-		int ret;
-
-		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
-		ret = check_conflicting_ftes(fte, flow_act);
-		if (ret) {
-			handle = ERR_PTR(ret);
-			goto unlock_fte;
-		}
-
-		old_action = fte->action;
-		fte->action |= flow_act->action;
-		handle = add_rule_fte(fte, fg, dest, dest_num,
-				      old_action != flow_act->action);
-		if (IS_ERR(handle)) {
-			fte->action = old_action;
-			goto unlock_fte;
-		} else {
-			trace_mlx5_fs_set_fte(fte, false);
-			goto add_rules;
-		}
-	}
-	fs_get_obj(ft, fg->node.parent);
+	ret = check_conflicting_ftes(fte, flow_act);
+	if (ret)
+		return ERR_PTR(ret);
 
-	fte = create_fte(fg, match_value, flow_act);
-	if (IS_ERR(fte))
-		return (void *)fte;
-	tree_init_node(&fte->node, 0, del_fte);
-	nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
-	handle = add_rule_fte(fte, fg, dest, dest_num, false);
+	old_action = fte->action;
+	fte->action |= flow_act->action;
+	handle = add_rule_fte(fte, fg, dest, dest_num,
+			      old_action != flow_act->action);
 	if (IS_ERR(handle)) {
-		unlock_ref_node(&fte->node);
-		destroy_fte(fte, fg);
-		kfree(fte);
+		fte->action = old_action;
 		return handle;
 	}
+	trace_mlx5_fs_set_fte(fte, false);
 
-	tree_add_node(&fte->node, &fg->node);
-	/* fte list isn't sorted */
-	list_add_tail(&fte->node.list, &fg->node.children);
-	trace_mlx5_fs_set_fte(fte, true);
-add_rules:
 	for (i = 0; i < handle->num_rules; i++) {
-		if (atomic_read(&handle->rule[i]->node.refcount) == 1) {
+		if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
 			tree_add_node(&handle->rule[i]->node, &fte->node);
 			trace_mlx5_fs_add_rule(handle->rule[i]);
 		}
 	}
-unlock_fte:
-	unlock_ref_node(&fte->node);
 	return handle;
 }
 
@@ -1441,93 +1518,197 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 	return true;
 }
 
-static struct mlx5_flow_handle *
-try_add_to_existing_fg(struct mlx5_flow_table *ft,
-		       struct mlx5_flow_spec *spec,
-		       struct mlx5_flow_act *flow_act,
-		       struct mlx5_flow_destination *dest,
-		       int dest_num)
-{
+struct match_list {
+	struct list_head	list;
 	struct mlx5_flow_group *g;
-	struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT);
+};
+
+struct match_list_head {
+	struct list_head  list;
+	struct match_list first;
+};
+
+static void free_match_list(struct match_list_head *head)
+{
+	if (!list_empty(&head->list)) {
+		struct match_list *iter, *match_tmp;
+
+		list_del(&head->first.list);
+		tree_put_node(&head->first.g->node);
+		list_for_each_entry_safe(iter, match_tmp, &head->list,
+					 list) {
+			tree_put_node(&iter->g->node);
+			list_del(&iter->list);
+			kfree(iter);
+		}
+	}
+}
+
+static int build_match_list(struct match_list_head *match_head,
+			    struct mlx5_flow_table *ft,
+			    struct mlx5_flow_spec *spec)
+{
 	struct rhlist_head *tmp, *list;
-	struct match_list {
-		struct list_head	list;
-		struct mlx5_flow_group *g;
-	} match_list, *iter;
-	LIST_HEAD(match_head);
+	struct mlx5_flow_group *g;
+	int err = 0;
 
 	rcu_read_lock();
+	INIT_LIST_HEAD(&match_head->list);
 	/* Collect all fgs which has a matching match_criteria */
 	list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
+	/* RCU is atomic, we can't execute FW commands here */
 	rhl_for_each_entry_rcu(g, tmp, list, hash) {
 		struct match_list *curr_match;
 
-		if (likely(list_empty(&match_head))) {
-			match_list.g = g;
-			list_add_tail(&match_list.list, &match_head);
+		if (likely(list_empty(&match_head->list))) {
+			if (!tree_get_node(&g->node))
+				continue;
+			match_head->first.g = g;
+			list_add_tail(&match_head->first.list,
+				      &match_head->list);
 			continue;
 		}
-		curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
 
+		curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
 		if (!curr_match) {
-			rcu_read_unlock();
-			rule = ERR_PTR(-ENOMEM);
-			goto free_list;
+			free_match_list(match_head);
+			err = -ENOMEM;
+			goto out;
+		}
+		if (!tree_get_node(&g->node)) {
+			kfree(curr_match);
+			continue;
 		}
 		curr_match->g = g;
-		list_add_tail(&curr_match->list, &match_head);
+		list_add_tail(&curr_match->list, &match_head->list);
 	}
+out:
 	rcu_read_unlock();
+	return err;
+}
+
+static u64 matched_fgs_get_version(struct list_head *match_head)
+{
+	struct match_list *iter;
+	u64 version = 0;
+
+	list_for_each_entry(iter, match_head, list)
+		version += (u64)atomic_read(&iter->g->node.version);
+	return version;
+}
 
+static struct mlx5_flow_handle *
+try_add_to_existing_fg(struct mlx5_flow_table *ft,
+		       struct list_head *match_head,
+		       struct mlx5_flow_spec *spec,
+		       struct mlx5_flow_act *flow_act,
+		       struct mlx5_flow_destination *dest,
+		       int dest_num,
+		       int ft_version)
+{
+	struct mlx5_flow_steering *steering = get_steering(&ft->node);
+	struct mlx5_flow_group *g;
+	struct mlx5_flow_handle *rule;
+	struct match_list *iter;
+	bool take_write = false;
+	struct fs_fte *fte;
+	u64  version;
+	int err;
+
+	fte = alloc_fte(ft, spec->match_value, flow_act);
+	if (IS_ERR(fte))
+		return  ERR_PTR(-ENOMEM);
+
+	list_for_each_entry(iter, match_head, list) {
+		nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT);
+		ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL);
+	}
+
+search_again_locked:
+	version = matched_fgs_get_version(match_head);
 	/* Try to find a fg that already contains a matching fte */
-	list_for_each_entry(iter, &match_head, list) {
-		struct fs_fte *fte;
+	list_for_each_entry(iter, match_head, list) {
+		struct fs_fte *fte_tmp;
 
 		g = iter->g;
-		nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
-		fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value,
-					     rhash_fte);
-		if (fte) {
-			rule = add_rule_fg(g, spec->match_value,
-					   flow_act, dest, dest_num, fte);
-			unlock_ref_node(&g->node);
-			goto free_list;
+		fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value,
+						 rhash_fte);
+		if (!fte_tmp || !tree_get_node(&fte_tmp->node))
+			continue;
+
+		nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+		if (!take_write) {
+			list_for_each_entry(iter, match_head, list)
+				up_read_ref_node(&iter->g->node);
+		} else {
+			list_for_each_entry(iter, match_head, list)
+				up_write_ref_node(&iter->g->node);
 		}
-		unlock_ref_node(&g->node);
+
+		rule = add_rule_fg(g, spec->match_value,
+				   flow_act, dest, dest_num, fte_tmp);
+		up_write_ref_node(&fte_tmp->node);
+		tree_put_node(&fte_tmp->node);
+		kmem_cache_free(steering->ftes_cache, fte);
+		return rule;
 	}
 
 	/* No group with matching fte found. Try to add a new fte to any
 	 * matching fg.
 	 */
-	list_for_each_entry(iter, &match_head, list) {
-		g = iter->g;
 
-		nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
-		rule = add_rule_fg(g, spec->match_value,
-				   flow_act, dest, dest_num, NULL);
-		if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) {
-			unlock_ref_node(&g->node);
-			goto free_list;
-		}
-		unlock_ref_node(&g->node);
+	if (!take_write) {
+		list_for_each_entry(iter, match_head, list)
+			up_read_ref_node(&iter->g->node);
+		list_for_each_entry(iter, match_head, list)
+			nested_down_write_ref_node(&iter->g->node,
+						   FS_LOCK_PARENT);
+		take_write = true;
 	}
 
-free_list:
-	if (!list_empty(&match_head)) {
-		struct match_list *match_tmp;
+	/* Check the ft version, for case that new flow group
+	 * was added while the fgs weren't locked
+	 */
+	if (atomic_read(&ft->node.version) != ft_version) {
+		rule = ERR_PTR(-EAGAIN);
+		goto out;
+	}
 
-		/* The most common case is having one FG. Since we want to
-		 * optimize this case, we save the first on the stack.
-		 * Therefore, no need to free it.
-		 */
-		list_del(&list_first_entry(&match_head, typeof(*iter), list)->list);
-		list_for_each_entry_safe(iter, match_tmp, &match_head, list) {
-			list_del(&iter->list);
-			kfree(iter);
+	/* Check the fgs version, for case the new FTE with the
+	 * same values was added while the fgs weren't locked
+	 */
+	if (version != matched_fgs_get_version(match_head))
+		goto search_again_locked;
+
+	list_for_each_entry(iter, match_head, list) {
+		g = iter->g;
+
+		if (!g->node.active)
+			continue;
+		err = insert_fte(g, fte);
+		if (err) {
+			if (err == -ENOSPC)
+				continue;
+			list_for_each_entry(iter, match_head, list)
+				up_write_ref_node(&iter->g->node);
+			kmem_cache_free(steering->ftes_cache, fte);
+			return ERR_PTR(err);
 		}
-	}
 
+		nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+		list_for_each_entry(iter, match_head, list)
+			up_write_ref_node(&iter->g->node);
+		rule = add_rule_fg(g, spec->match_value,
+				   flow_act, dest, dest_num, fte);
+		up_write_ref_node(&fte->node);
+		tree_put_node(&fte->node);
+		return rule;
+	}
+	rule = ERR_PTR(-ENOENT);
+out:
+	list_for_each_entry(iter, match_head, list)
+		up_write_ref_node(&iter->g->node);
+	kmem_cache_free(steering->ftes_cache, fte);
 	return rule;
 }
 
@@ -1539,8 +1720,14 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		     int dest_num)
 
 {
+	struct mlx5_flow_steering *steering = get_steering(&ft->node);
 	struct mlx5_flow_group *g;
 	struct mlx5_flow_handle *rule;
+	struct match_list_head match_head;
+	bool take_write = false;
+	struct fs_fte *fte;
+	int version;
+	int err;
 	int i;
 
 	if (!check_valid_spec(spec))
@@ -1550,33 +1737,73 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		if (!dest_is_valid(&dest[i], flow_act->action, ft))
 			return ERR_PTR(-EINVAL);
 	}
+	nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+search_again_locked:
+	version = atomic_read(&ft->node.version);
+
+	/* Collect all fgs which has a matching match_criteria */
+	err = build_match_list(&match_head, ft, spec);
+	if (err)
+		return ERR_PTR(err);
 
-	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
-	rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num);
-	if (!IS_ERR(rule))
-		goto unlock;
+	if (!take_write)
+		up_read_ref_node(&ft->node);
 
-	g = create_autogroup(ft, spec->match_criteria_enable,
-			     spec->match_criteria);
+	rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
+				      dest_num, version);
+	free_match_list(&match_head);
+	if (!IS_ERR(rule) ||
+	    (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN))
+		return rule;
+
+	if (!take_write) {
+		nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+		take_write = true;
+	}
+
+	if (PTR_ERR(rule) == -EAGAIN ||
+	    version != atomic_read(&ft->node.version))
+		goto search_again_locked;
+
+	g = alloc_auto_flow_group(ft, spec);
 	if (IS_ERR(g)) {
 		rule = (void *)g;
-		goto unlock;
+		up_write_ref_node(&ft->node);
+		return rule;
 	}
 
-	rule = add_rule_fg(g, spec->match_value, flow_act, dest,
-			   dest_num, NULL);
-	if (IS_ERR(rule)) {
-		/* Remove assumes refcount > 0 and autogroup creates a group
-		 * with a refcount = 0.
-		 */
-		unlock_ref_node(&ft->node);
-		tree_get_node(&g->node);
-		tree_remove_node(&g->node);
-		return rule;
+	nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+	up_write_ref_node(&ft->node);
+
+	err = create_auto_flow_group(ft, g);
+	if (err)
+		goto err_release_fg;
+
+	fte = alloc_fte(ft, spec->match_value, flow_act);
+	if (IS_ERR(fte)) {
+		err = PTR_ERR(fte);
+		goto err_release_fg;
 	}
-unlock:
-	unlock_ref_node(&ft->node);
+
+	err = insert_fte(g, fte);
+	if (err) {
+		kmem_cache_free(steering->ftes_cache, fte);
+		goto err_release_fg;
+	}
+
+	nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+	up_write_ref_node(&g->node);
+	rule = add_rule_fg(g, spec->match_value, flow_act, dest,
+			   dest_num, fte);
+	up_write_ref_node(&fte->node);
+	tree_put_node(&fte->node);
+	tree_put_node(&g->node);
 	return rule;
+
+err_release_fg:
+	up_write_ref_node(&g->node);
+	tree_put_node(&g->node);
+	return ERR_PTR(err);
 }
 
 static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
@@ -1593,7 +1820,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		    int dest_num)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
-	struct mlx5_flow_destination gen_dest;
+	struct mlx5_flow_destination gen_dest = {};
 	struct mlx5_flow_table *next_ft = NULL;
 	struct mlx5_flow_handle *handle = NULL;
 	u32 sw_action = flow_act->action;
@@ -1661,23 +1888,43 @@ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
 static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+	struct mlx5_ft_underlay_qp *uqp;
 	struct mlx5_flow_table *new_root_ft = NULL;
+	int err = 0;
+	u32 qpn;
 
 	if (root->root_ft != ft)
 		return 0;
 
 	new_root_ft = find_next_ft(ft);
-	if (new_root_ft) {
-		int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
-						  root->underlay_qpn);
 
-		if (err) {
-			mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
-				       ft->id);
-			return err;
+	if (!new_root_ft) {
+		root->root_ft = NULL;
+		return 0;
+	}
+
+	if (list_empty(&root->underlay_qpns)) {
+		/* Don't set any QPN (zero) in case QPN list is empty */
+		qpn = 0;
+		err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn,
+					      false);
+	} else {
+		list_for_each_entry(uqp, &root->underlay_qpns, list) {
+			qpn = uqp->qpn;
+			err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
+						      qpn, false);
+			if (err)
+				break;
 		}
 	}
-	root->root_ft = new_root_ft;
+
+	if (err)
+		mlx5_core_warn(root->dev,
+			       "Update root flow table of id(%u) qpn(%d) failed\n",
+			       ft->id, qpn);
+	else
+		root->root_ft = new_root_ft;
+
 	return 0;
 }
 
@@ -1817,7 +2064,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 		return ERR_PTR(-ENOMEM);
 
 	fs_prio->node.type = FS_TYPE_PRIO;
-	tree_init_node(&fs_prio->node, 1, NULL);
+	tree_init_node(&fs_prio->node, NULL, NULL);
 	tree_add_node(&fs_prio->node, &ns->node);
 	fs_prio->num_levels = num_levels;
 	fs_prio->prio = prio;
@@ -1843,7 +2090,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
 		return ERR_PTR(-ENOMEM);
 
 	fs_init_namespace(ns);
-	tree_init_node(&ns->node, 1, NULL);
+	tree_init_node(&ns->node, NULL, NULL);
 	tree_add_node(&ns->node, &prio->node);
 	list_add_tail(&ns->node.list, &prio->node.children);
 
@@ -1965,10 +2212,12 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering
 	root_ns->dev = steering->dev;
 	root_ns->table_type = table_type;
 
+	INIT_LIST_HEAD(&root_ns->underlay_qpns);
+
 	ns = &root_ns->ns;
 	fs_init_namespace(ns);
 	mutex_init(&root_ns->chain_lock);
-	tree_init_node(&ns->node, 1, NULL);
+	tree_init_node(&ns->node, NULL, NULL);
 	tree_add_node(&ns->node, NULL);
 
 	return root_ns;
@@ -2066,8 +2315,10 @@ static void clean_tree(struct fs_node *node)
 		struct fs_node *iter;
 		struct fs_node *temp;
 
+		tree_get_node(node);
 		list_for_each_entry_safe(iter, temp, &node->children, list)
 			clean_tree(iter);
+		tree_put_node(node);
 		tree_remove_node(node);
 	}
 }
@@ -2091,6 +2342,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
 	mlx5_cleanup_fc_stats(dev);
+	kmem_cache_destroy(steering->ftes_cache);
+	kmem_cache_destroy(steering->fgs_cache);
 	kfree(steering);
 }
 
@@ -2196,6 +2449,16 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 	steering->dev = dev;
 	dev->priv.steering = steering;
 
+	steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+						sizeof(struct mlx5_flow_group), 0,
+						0, NULL);
+	steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+						 0, NULL);
+	if (!steering->ftes_cache || !steering->fgs_cache) {
+		err = -ENOMEM;
+		goto err;
+	}
+
 	if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
 	      (MLX5_CAP_GEN(dev, nic_flow_table))) ||
 	     ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
@@ -2245,17 +2508,76 @@ err:
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 {
 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+	struct mlx5_ft_underlay_qp *new_uqp;
+	int err = 0;
+
+	new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
+	if (!new_uqp)
+		return -ENOMEM;
+
+	mutex_lock(&root->chain_lock);
+
+	if (!root->root_ft) {
+		err = -EINVAL;
+		goto update_ft_fail;
+	}
+
+	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false);
+	if (err) {
+		mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
+			       underlay_qpn, err);
+		goto update_ft_fail;
+	}
+
+	new_uqp->qpn = underlay_qpn;
+	list_add_tail(&new_uqp->list, &root->underlay_qpns);
+
+	mutex_unlock(&root->chain_lock);
 
-	root->underlay_qpn = underlay_qpn;
 	return 0;
+
+update_ft_fail:
+	mutex_unlock(&root->chain_lock);
+	kfree(new_uqp);
+	return err;
 }
 EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
 
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 {
 	struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+	struct mlx5_ft_underlay_qp *uqp;
+	bool found = false;
+	int err = 0;
+
+	mutex_lock(&root->chain_lock);
+	list_for_each_entry(uqp, &root->underlay_qpns, list) {
+		if (uqp->qpn == underlay_qpn) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
+			       underlay_qpn);
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true);
+	if (err)
+		mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
+			       underlay_qpn, err);
+
+	list_del(&uqp->list);
+	mutex_unlock(&root->chain_lock);
+	kfree(uqp);
 
-	root->underlay_qpn = 0;
 	return 0;
+
+out:
+	mutex_unlock(&root->chain_lock);
+	return err;
 }
 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 48dd78975062..397d24a621a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -33,6 +33,7 @@
 #ifndef _MLX5_FS_CORE_
 #define _MLX5_FS_CORE_
 
+#include <linux/refcount.h>
 #include <linux/mlx5/fs.h>
 #include <linux/rhashtable.h>
 
@@ -66,6 +67,8 @@ enum fs_fte_status {
 
 struct mlx5_flow_steering {
 	struct mlx5_core_dev *dev;
+	struct kmem_cache               *fgs_cache;
+	struct kmem_cache               *ftes_cache;
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_root_namespace *fdb_root_ns;
 	struct mlx5_flow_root_namespace *esw_egress_root_ns;
@@ -81,9 +84,12 @@ struct fs_node {
 	struct fs_node		*parent;
 	struct fs_node		*root;
 	/* lock the node for writing and traversing */
-	struct mutex		lock;
-	atomic_t		refcount;
-	void			(*remove_func)(struct fs_node *);
+	struct rw_semaphore	lock;
+	refcount_t		refcount;
+	bool			active;
+	void			(*del_hw_func)(struct fs_node *);
+	void			(*del_sw_func)(struct fs_node *);
+	atomic_t		version;
 };
 
 struct mlx5_flow_rule {
@@ -120,7 +126,6 @@ struct mlx5_flow_table {
 	/* FWD rules that point on this flow table */
 	struct list_head		fwd_rules;
 	u32				flags;
-	struct ida			fte_allocator;
 	struct rhltable			fgs_hash;
 };
 
@@ -147,6 +152,11 @@ struct mlx5_fc {
 	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
 };
 
+struct mlx5_ft_underlay_qp {
+	struct list_head list;
+	u32 qpn;
+};
+
 #define MLX5_FTE_MATCH_PARAM_RESERVED	reserved_at_600
 /* Calculate the fte_match_param length and without the reserved length.
  * Make sure the reserved field is the last.
@@ -200,6 +210,7 @@ struct mlx5_flow_group {
 	struct mlx5_flow_group_mask	mask;
 	u32				start_index;
 	u32				max_ftes;
+	struct ida			fte_allocator;
 	u32				id;
 	struct rhashtable		ftes_hash;
 	struct rhlist_head		hash;
@@ -212,7 +223,7 @@ struct mlx5_flow_root_namespace {
 	struct mlx5_flow_table		*root_ft;
 	/* Should be held when chaining flow tables */
 	struct mutex			chain_lock;
-	u32				underlay_qpn;
+	struct list_head		underlay_qpns;
 };
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 2c71557d1cee..5ef1b56b6a96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -106,6 +106,13 @@ static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev)
 				   MLX5_MCAM_REGS_FIRST_128);
 }
 
+static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev)
+{
+	return mlx5_query_qcam_reg(dev, dev->caps.qcam,
+				   MLX5_QCAM_FEATURE_ENHANCED_FEATURES,
+				   MLX5_QCAM_REGS_FIRST_128);
+}
+
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 {
 	int err;
@@ -182,6 +189,9 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, mcam_reg))
 		mlx5_get_mcam_reg(dev);
 
+	if (MLX5_CAP_GEN(dev, qcam_reg))
+		mlx5_get_qcam_reg(dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index db86e1506c8b..1a0e797ad001 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -285,9 +285,9 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
 	spin_unlock_irqrestore(&health->wq_lock, flags);
 }
 
-static void poll_health(unsigned long data)
+static void poll_health(struct timer_list *t)
 {
-	struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
+	struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
 	struct mlx5_core_health *health = &dev->priv.health;
 	u32 count;
 
@@ -320,15 +320,13 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 
-	init_timer(&health->timer);
+	timer_setup(&health->timer, poll_health, 0);
 	health->sick = 0;
 	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 	clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
-	health->timer.data = (unsigned long)dev;
-	health->timer.function = poll_health;
 	health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
 	add_timer(&health->timer);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 43c126c63955..6f338a9219c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -250,3 +250,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
 	.get_link_ksettings = mlx5i_get_link_ksettings,
 	.get_link           = ethtool_op_get_link,
 };
+
+const struct ethtool_ops mlx5i_pkey_ethtool_ops = {
+	.get_drvinfo        = mlx5i_get_drvinfo,
+	.get_link           = ethtool_op_get_link,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 145e392ab849..d2a66dc4adc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -40,8 +40,6 @@
 
 static int mlx5i_open(struct net_device *netdev);
 static int mlx5i_close(struct net_device *netdev);
-static int  mlx5i_dev_init(struct net_device *dev);
-static void mlx5i_dev_cleanup(struct net_device *dev);
 static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu);
 static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
 
@@ -70,10 +68,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 }
 
 /* Called directly after IPoIB netdevice was created to initialize SW structs */
-static void mlx5i_init(struct mlx5_core_dev *mdev,
-		       struct net_device *netdev,
-		       const struct mlx5e_profile *profile,
-		       void *ppriv)
+void mlx5i_init(struct mlx5_core_dev *mdev,
+		struct net_device *netdev,
+		const struct mlx5e_profile *profile,
+		void *ppriv)
 {
 	struct mlx5e_priv *priv  = mlx5i_epriv(netdev);
 
@@ -108,11 +106,69 @@ static void mlx5i_cleanup(struct mlx5e_priv *priv)
 	/* Do nothing .. */
 }
 
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5i_priv *ipriv = priv->ppriv;
+	struct mlx5_core_qp *qp = &ipriv->qp;
+	struct mlx5_qp_context *context;
+	int ret;
+
+	/* QP states */
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return -ENOMEM;
+
+	context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+	context->pri_path.port = 1;
+	context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index);
+	context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
+
+	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
+		goto err_qp_modify_to_err;
+	}
+	memset(context, 0, sizeof(*context));
+
+	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
+		goto err_qp_modify_to_err;
+	}
+
+	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
+	if (ret) {
+		mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
+		goto err_qp_modify_to_err;
+	}
+
+	kfree(context);
+	return 0;
+
+err_qp_modify_to_err:
+	mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp);
+	kfree(context);
+	return ret;
+}
+
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
+{
+	struct mlx5i_priv *ipriv = priv->ppriv;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_qp_context context;
+	int err;
+
+	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context,
+				  &ipriv->qp);
+	if (err)
+		mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err);
+}
+
 #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
 
-static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
 {
-	struct mlx5_qp_context *context = NULL;
 	u32 *in = NULL;
 	void *addr_path;
 	int ret = 0;
@@ -140,43 +196,12 @@ static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core
 		goto out;
 	}
 
-	/* QP states */
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
-	context->pri_path.port = 1;
-	context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
-
-	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
-	if (ret) {
-		mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
-		goto out;
-	}
-	memset(context, 0, sizeof(*context));
-
-	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
-	if (ret) {
-		mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
-		goto out;
-	}
-
-	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
-	if (ret) {
-		mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
-		goto out;
-	}
-
 out:
-	kfree(context);
 	kvfree(in);
 	return ret;
 }
 
-static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
 {
 	mlx5_core_destroy_qp(mdev, qp);
 }
@@ -195,10 +220,14 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 	err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
-		return err;
+		goto err_destroy_underlay_qp;
 	}
 
 	return 0;
+
+err_destroy_underlay_qp:
+	mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+	return err;
 }
 
 static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
@@ -226,15 +255,24 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
 		priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
 	}
 
+	err = mlx5e_create_inner_ttc_table(priv);
+	if (err) {
+		netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+			   err);
+		goto err_destroy_arfs_tables;
+	}
+
 	err = mlx5e_create_ttc_table(priv);
 	if (err) {
 		netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
 			   err);
-		goto err_destroy_arfs_tables;
+		goto err_destroy_inner_ttc_table;
 	}
 
 	return 0;
 
+err_destroy_inner_ttc_table:
+	mlx5e_destroy_inner_ttc_table(priv);
 err_destroy_arfs_tables:
 	mlx5e_arfs_destroy_tables(priv);
 
@@ -244,12 +282,12 @@ err_destroy_arfs_tables:
 static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
 {
 	mlx5e_destroy_ttc_table(priv);
+	mlx5e_destroy_inner_ttc_table(priv);
 	mlx5e_arfs_destroy_tables(priv);
 }
 
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
-	struct mlx5i_priv *ipriv  = priv->ppriv;
 	int err;
 
 	err = mlx5e_create_indirect_rqt(priv);
@@ -268,18 +306,12 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_indirect_tirs;
 
-	err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
-	if (err)
-		goto err_destroy_direct_tirs;
-
 	err = mlx5i_create_flow_steering(priv);
 	if (err)
-		goto err_remove_rx_underlay_qpn;
+		goto err_destroy_direct_tirs;
 
 	return 0;
 
-err_remove_rx_underlay_qpn:
-	mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
 err_destroy_direct_tirs:
 	mlx5e_destroy_direct_tirs(priv);
 err_destroy_indirect_tirs:
@@ -293,9 +325,6 @@ err_destroy_indirect_rqts:
 
 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 {
-	struct mlx5i_priv *ipriv  = priv->ppriv;
-
-	mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
 	mlx5i_destroy_flow_steering(priv);
 	mlx5e_destroy_direct_tirs(priv);
 	mlx5e_destroy_indirect_tirs(priv);
@@ -351,7 +380,7 @@ out:
 	return err;
 }
 
-static int mlx5i_dev_init(struct net_device *dev)
+int mlx5i_dev_init(struct net_device *dev)
 {
 	struct mlx5e_priv    *priv   = mlx5i_epriv(dev);
 	struct mlx5i_priv    *ipriv  = priv->ppriv;
@@ -361,6 +390,9 @@ static int mlx5i_dev_init(struct net_device *dev)
 	dev->dev_addr[2] = (ipriv->qp.qpn >>  8) & 0xff;
 	dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff;
 
+	/* Add QPN to net-device mapping to HT */
+	mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn);
+
 	return 0;
 }
 
@@ -378,63 +410,84 @@ static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	}
 }
 
-static void mlx5i_dev_cleanup(struct net_device *dev)
+void mlx5i_dev_cleanup(struct net_device *dev)
 {
 	struct mlx5e_priv    *priv   = mlx5i_epriv(dev);
-	struct mlx5_core_dev *mdev   = priv->mdev;
-	struct mlx5i_priv    *ipriv  = priv->ppriv;
-	struct mlx5_qp_context context;
+	struct mlx5i_priv    *ipriv = priv->ppriv;
+
+	mlx5i_uninit_underlay_qp(priv);
 
-	/* detach qp from flow-steering by reset it */
-	mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp);
+	/* Delete QPN to net-device mapping from HT */
+	mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn);
 }
 
 static int mlx5i_open(struct net_device *netdev)
 {
-	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+	struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = epriv->ppriv;
+	struct mlx5_core_dev *mdev = epriv->mdev;
 	int err;
 
-	mutex_lock(&priv->state_lock);
+	mutex_lock(&epriv->state_lock);
 
-	set_bit(MLX5E_STATE_OPENED, &priv->state);
+	set_bit(MLX5E_STATE_OPENED, &epriv->state);
 
-	err = mlx5e_open_channels(priv, &priv->channels);
-	if (err)
+	err = mlx5i_init_underlay_qp(epriv);
+	if (err) {
+		mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err);
 		goto err_clear_state_opened_flag;
+	}
+
+	err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	if (err) {
+		mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
+		goto err_reset_qp;
+	}
 
-	mlx5e_refresh_tirs(priv, false);
-	mlx5e_activate_priv_channels(priv);
-	mlx5e_timestamp_init(priv);
+	err = mlx5e_open_channels(epriv, &epriv->channels);
+	if (err)
+		goto err_remove_fs_underlay_qp;
 
-	mutex_unlock(&priv->state_lock);
+	mlx5e_refresh_tirs(epriv, false);
+	mlx5e_activate_priv_channels(epriv);
+	mlx5e_timestamp_set(epriv);
+
+	mutex_unlock(&epriv->state_lock);
 	return 0;
 
+err_remove_fs_underlay_qp:
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_reset_qp:
+	mlx5i_uninit_underlay_qp(epriv);
 err_clear_state_opened_flag:
-	clear_bit(MLX5E_STATE_OPENED, &priv->state);
-	mutex_unlock(&priv->state_lock);
+	clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+	mutex_unlock(&epriv->state_lock);
 	return err;
 }
 
 static int mlx5i_close(struct net_device *netdev)
 {
-	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+	struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = epriv->ppriv;
+	struct mlx5_core_dev *mdev = epriv->mdev;
 
 	/* May already be CLOSED in case a previous configuration operation
 	 * (e.g RX/TX queue size change) that involves close&open failed.
 	 */
-	mutex_lock(&priv->state_lock);
+	mutex_lock(&epriv->state_lock);
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+	if (!test_bit(MLX5E_STATE_OPENED, &epriv->state))
 		goto unlock;
 
-	clear_bit(MLX5E_STATE_OPENED, &priv->state);
+	clear_bit(MLX5E_STATE_OPENED, &epriv->state);
 
-	mlx5e_timestamp_cleanup(priv);
-	netif_carrier_off(priv->netdev);
-	mlx5e_deactivate_priv_channels(priv);
-	mlx5e_close_channels(&priv->channels);
+	netif_carrier_off(epriv->netdev);
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	mlx5i_uninit_underlay_qp(epriv);
+	mlx5e_deactivate_priv_channels(epriv);
+	mlx5e_close_channels(&epriv->channels);;
 unlock:
-	mutex_unlock(&priv->state_lock);
+	mutex_unlock(&epriv->state_lock);
 	return 0;
 }
 
@@ -492,6 +545,13 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
 	return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
 }
 
+static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
+{
+	struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+	ipriv->pkey_index = (u16)id;
+}
+
 static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 {
 	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
@@ -510,12 +570,13 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  const char *name,
 					  void (*setup)(struct net_device *))
 {
-	const struct mlx5e_profile *profile = &mlx5i_nic_profile;
-	int nch = profile->max_nch(mdev);
+	const struct mlx5e_profile *profile;
 	struct net_device *netdev;
 	struct mlx5i_priv *ipriv;
 	struct mlx5e_priv *epriv;
 	struct rdma_netdev *rn;
+	bool sub_interface;
+	int nch;
 	int err;
 
 	if (mlx5i_check_required_hca_cap(mdev)) {
@@ -523,10 +584,15 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 		return ERR_PTR(-EOPNOTSUPP);
 	}
 
-	/* This function should only be called once per mdev */
-	err = mlx5e_create_mdev_resources(mdev);
-	if (err)
-		return NULL;
+	/* TODO: Need to find a better way to check if child device*/
+	sub_interface = (mdev->mlx5e_res.pdn != 0);
+
+	if (sub_interface)
+		profile = mlx5i_pkey_get_profile();
+	else
+		profile = &mlx5i_nic_profile;
+
+	nch = profile->max_nch(mdev);
 
 	netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
 				  name, NET_NAME_UNKNOWN,
@@ -535,7 +601,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 				  nch);
 	if (!netdev) {
 		mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
-		goto free_mdev_resources;
+		return NULL;
 	}
 
 	ipriv = netdev_priv(netdev);
@@ -545,6 +611,20 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	if (!epriv->wq)
 		goto err_free_netdev;
 
+	ipriv->sub_interface = sub_interface;
+	if (!ipriv->sub_interface) {
+		err = mlx5i_pkey_qpn_ht_init(netdev);
+		if (err) {
+			mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
+			goto destroy_wq;
+		}
+
+		/* This should only be called once per mdev */
+		err = mlx5e_create_mdev_resources(mdev);
+		if (err)
+			goto destroy_ht;
+	}
+
 	profile->init(mdev, netdev, profile, ipriv);
 
 	mlx5e_attach_netdev(epriv);
@@ -556,13 +636,16 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	rn->send = mlx5i_xmit;
 	rn->attach_mcast = mlx5i_attach_mcast;
 	rn->detach_mcast = mlx5i_detach_mcast;
+	rn->set_id = mlx5i_set_pkey_index;
 
 	return netdev;
 
+destroy_ht:
+	mlx5i_pkey_qpn_ht_cleanup(netdev);
+destroy_wq:
+	destroy_workqueue(epriv->wq);
 err_free_netdev:
 	free_netdev(netdev);
-free_mdev_resources:
-	mlx5e_destroy_mdev_resources(mdev);
 
 	return NULL;
 }
@@ -570,15 +653,18 @@ EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
 
 void mlx5_rdma_netdev_free(struct net_device *netdev)
 {
-	struct mlx5e_priv          *priv    = mlx5i_epriv(netdev);
+	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = priv->ppriv;
 	const struct mlx5e_profile *profile = priv->profile;
-	struct mlx5_core_dev       *mdev    = priv->mdev;
 
 	mlx5e_detach_netdev(priv);
 	profile->cleanup(priv);
 	destroy_workqueue(priv->wq);
-	free_netdev(netdev);
 
-	mlx5e_destroy_mdev_resources(mdev);
+	if (!ipriv->sub_interface) {
+		mlx5i_pkey_qpn_ht_cleanup(netdev);
+		mlx5e_destroy_mdev_resources(priv->mdev);
+	}
+	free_netdev(netdev);
 }
 EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index a0f405f520f7..49008022c306 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -39,6 +39,7 @@
 #define MLX5I_MAX_NUM_TC 1
 
 extern const struct ethtool_ops mlx5i_ethtool_ops;
+extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
 
 #define MLX5_IB_GRH_BYTES       40
 #define MLX5_IPOIB_ENCAP_LEN    4
@@ -49,10 +50,45 @@ extern const struct ethtool_ops mlx5i_ethtool_ops;
 struct mlx5i_priv {
 	struct rdma_netdev rn; /* keep this first */
 	struct mlx5_core_qp qp;
+	bool   sub_interface;
 	u32    qkey;
+	u16    pkey_index;
+	struct mlx5i_pkey_qpn_ht *qpn_htbl;
 	char  *mlx5e_priv[0];
 };
 
+/* Underlay QP create/destroy functions */
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+
+/* Underlay QP state modification init/uninit functions */
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv);
+
+/* Allocate/Free underlay QPN to net-device hash table */
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev);
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev);
+
+/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn);
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn);
+
+/* Get the net-device corresponding to the given underlay QPN */
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn);
+
+/* Shared ndo functionts */
+int mlx5i_dev_init(struct net_device *dev);
+void mlx5i_dev_cleanup(struct net_device *dev);
+
+/* Parent profile functions */
+void mlx5i_init(struct mlx5_core_dev *mdev,
+		struct net_device *netdev,
+		const struct mlx5e_profile *profile,
+		void *ppriv);
+
+/* Get child interface nic profile */
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
+
 /* Extract mlx5e_priv from IPoIB netdev */
 #define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv))
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
new file mode 100644
index 000000000000..531b02cc979b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include "ipoib.h"
+
+#define MLX5I_MAX_LOG_PKEY_SUP 7
+
+struct qpn_to_netdev {
+	struct net_device *netdev;
+	struct hlist_node hlist;
+	u32 underlay_qpn;
+};
+
+struct mlx5i_pkey_qpn_ht {
+	struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP];
+	spinlock_t ht_lock; /* Synchronise with NAPI */
+};
+
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev)
+{
+	struct mlx5i_priv *ipriv = netdev_priv(netdev);
+	struct mlx5i_pkey_qpn_ht *qpn_htbl;
+
+	qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL);
+	if (!qpn_htbl)
+		return -ENOMEM;
+
+	ipriv->qpn_htbl = qpn_htbl;
+	spin_lock_init(&qpn_htbl->ht_lock);
+
+	return 0;
+}
+
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev)
+{
+	struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+	kfree(ipriv->qpn_htbl);
+}
+
+static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets,
+							   u32 qpn)
+{
+	struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)];
+	struct qpn_to_netdev *node;
+
+	hlist_for_each_entry(node, h, hlist) {
+		if (node->underlay_qpn == qpn)
+			return node;
+	}
+
+	return NULL;
+}
+
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn)
+{
+	struct mlx5i_priv *ipriv = netdev_priv(netdev);
+	struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+	u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP);
+	struct qpn_to_netdev *new_node;
+
+	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node)
+		return -ENOMEM;
+
+	new_node->netdev = netdev;
+	new_node->underlay_qpn = qpn;
+	spin_lock_bh(&ht->ht_lock);
+	hlist_add_head(&new_node->hlist, &ht->buckets[key]);
+	spin_unlock_bh(&ht->ht_lock);
+
+	return 0;
+}
+
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn)
+{
+	struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = epriv->ppriv;
+	struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+	struct qpn_to_netdev *node;
+
+	node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn);
+	if (!node) {
+		mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n");
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&ht->ht_lock);
+	hlist_del_init(&node->hlist);
+	spin_unlock_bh(&ht->ht_lock);
+	kfree(node);
+
+	return 0;
+}
+
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn)
+{
+	struct mlx5i_priv *ipriv = netdev_priv(netdev);
+	struct qpn_to_netdev *node;
+
+	node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn);
+	if (!node)
+		return NULL;
+
+	return node->netdev;
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev);
+static int mlx5i_pkey_close(struct net_device *netdev);
+static int mlx5i_pkey_dev_init(struct net_device *dev);
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev);
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu);
+
+static const struct net_device_ops mlx5i_pkey_netdev_ops = {
+	.ndo_open                = mlx5i_pkey_open,
+	.ndo_stop                = mlx5i_pkey_close,
+	.ndo_init                = mlx5i_pkey_dev_init,
+	.ndo_uninit              = mlx5i_pkey_dev_cleanup,
+	.ndo_change_mtu          = mlx5i_pkey_change_mtu,
+};
+
+/* Child NDOs */
+static int mlx5i_pkey_dev_init(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = mlx5i_epriv(dev);
+	struct mlx5i_priv *ipriv, *parent_ipriv;
+	struct net_device *parent_dev;
+	int parent_ifindex;
+
+	ipriv = priv->ppriv;
+
+	/* Get QPN to netdevice hash table from parent */
+	parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev);
+	parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex);
+	if (!parent_dev) {
+		mlx5_core_warn(priv->mdev, "failed to get parent device\n");
+		return -EINVAL;
+	}
+
+	parent_ipriv = netdev_priv(parent_dev);
+	ipriv->qpn_htbl = parent_ipriv->qpn_htbl;
+	dev_put(parent_dev);
+
+	return mlx5i_dev_init(dev);
+}
+
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev)
+{
+	return mlx5i_dev_cleanup(netdev);
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev)
+{
+	struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = epriv->ppriv;
+	struct mlx5_core_dev *mdev = epriv->mdev;
+	int err;
+
+	mutex_lock(&epriv->state_lock);
+
+	set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+	err = mlx5i_init_underlay_qp(epriv);
+	if (err) {
+		mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err);
+		goto err_release_lock;
+	}
+
+	err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	if (err) {
+		mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err);
+		goto err_unint_underlay_qp;
+	}
+
+	err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+	if (err) {
+		mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
+		goto err_remove_rx_uderlay_qp;
+	}
+
+	err = mlx5e_open_channels(epriv, &epriv->channels);
+	if (err) {
+		mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
+		goto err_clear_state_opened_flag;
+	}
+	mlx5e_refresh_tirs(epriv, false);
+	mlx5e_activate_priv_channels(epriv);
+	mutex_unlock(&epriv->state_lock);
+
+	return 0;
+
+err_clear_state_opened_flag:
+	mlx5e_destroy_tis(mdev, epriv->tisn[0]);
+err_remove_rx_uderlay_qp:
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_unint_underlay_qp:
+	mlx5i_uninit_underlay_qp(epriv);
+err_release_lock:
+	clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+	mutex_unlock(&epriv->state_lock);
+	return err;
+}
+
+static int mlx5i_pkey_close(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = priv->ppriv;
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
+
+	clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+	netif_carrier_off(priv->netdev);
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	mlx5i_uninit_underlay_qp(priv);
+	mlx5e_deactivate_priv_channels(priv);
+	mlx5e_close_channels(&priv->channels);
+	mlx5e_destroy_tis(mdev, priv->tisn[0]);
+unlock:
+	mutex_unlock(&priv->state_lock);
+	return 0;
+}
+
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+	mutex_lock(&priv->state_lock);
+	netdev->mtu = new_mtu;
+	mutex_unlock(&priv->state_lock);
+
+	return 0;
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
+			     struct net_device *netdev,
+			     const struct mlx5e_profile *profile,
+			     void *ppriv)
+{
+	struct mlx5e_priv *priv  = mlx5i_epriv(netdev);
+
+	mlx5i_init(mdev, netdev, profile, ppriv);
+
+	/* Override parent ndo */
+	netdev->netdev_ops = &mlx5i_pkey_netdev_ops;
+
+	/* Set child limited ethtool support */
+	netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops;
+
+	/* Use dummy rqs */
+	priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
+{
+	/* Do nothing .. */
+}
+
+static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
+{
+	struct mlx5i_priv *ipriv = priv->ppriv;
+	int err;
+
+	err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+	if (err) {
+		mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv)
+{
+	struct mlx5i_priv *ipriv = priv->ppriv;
+
+	mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+}
+
+static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv)
+{
+	/* Since the rx resources are shared between child and parent, the
+	 * parent interface is taking care of rx resource allocation and init
+	 */
+	return 0;
+}
+
+static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv)
+{
+	/* Since the rx resources are shared between child and parent, the
+	 * parent interface is taking care of rx resource free and de-init
+	 */
+}
+
+static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
+	.init		   = mlx5i_pkey_init,
+	.cleanup	   = mlx5i_pkey_cleanup,
+	.init_tx	   = mlx5i_pkey_init_tx,
+	.cleanup_tx	   = mlx5i_pkey_cleanup_tx,
+	.init_rx	   = mlx5i_pkey_init_rx,
+	.cleanup_rx	   = mlx5i_pkey_cleanup_rx,
+	.enable		   = NULL,
+	.disable	   = NULL,
+	.update_stats	   = NULL,
+	.max_nch	   = mlx5e_get_max_num_channels,
+	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
+	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+	.max_tc		   = MLX5I_MAX_NUM_TC,
+};
+
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
+{
+	return &mlx5i_pkey_nic_profile;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
new file mode 100644
index 000000000000..fa8aed62b231
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/clocksource.h>
+#include "en.h"
+
+enum {
+	MLX5_CYCLES_SHIFT	= 23
+};
+
+enum {
+	MLX5_PIN_MODE_IN		= 0x0,
+	MLX5_PIN_MODE_OUT		= 0x1,
+};
+
+enum {
+	MLX5_OUT_PATTERN_PULSE		= 0x0,
+	MLX5_OUT_PATTERN_PERIODIC	= 0x1,
+};
+
+enum {
+	MLX5_EVENT_MODE_DISABLE	= 0x0,
+	MLX5_EVENT_MODE_REPETETIVE	= 0x1,
+	MLX5_EVENT_MODE_ONCE_TILL_ARM	= 0x2,
+};
+
+enum {
+	MLX5_MTPPS_FS_ENABLE			= BIT(0x0),
+	MLX5_MTPPS_FS_PATTERN			= BIT(0x2),
+	MLX5_MTPPS_FS_PIN_MODE			= BIT(0x3),
+	MLX5_MTPPS_FS_TIME_STAMP		= BIT(0x4),
+	MLX5_MTPPS_FS_OUT_PULSE_DURATION	= BIT(0x5),
+	MLX5_MTPPS_FS_ENH_OUT_PER_ADJ		= BIT(0x7),
+};
+
+static u64 read_internal_timer(const struct cyclecounter *cc)
+{
+	struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles);
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+						  clock);
+
+	return mlx5_read_internal_timer(mdev) & cc->mask;
+}
+
+static void mlx5_pps_out(struct work_struct *work)
+{
+	struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
+						 out_work);
+	struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
+						pps_info);
+	struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+						  clock);
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < clock->ptp_info.n_pins; i++) {
+		u64 tstart;
+
+		write_lock_irqsave(&clock->lock, flags);
+		tstart = clock->pps_info.start[i];
+		clock->pps_info.start[i] = 0;
+		write_unlock_irqrestore(&clock->lock, flags);
+		if (!tstart)
+			continue;
+
+		MLX5_SET(mtpps_reg, in, pin, i);
+		MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
+		MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP);
+		mlx5_set_mtpps(mdev, in, sizeof(in));
+	}
+}
+
+static void mlx5_timestamp_overflow(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock,
+						overflow_work);
+	unsigned long flags;
+
+	write_lock_irqsave(&clock->lock, flags);
+	timecounter_read(&clock->tc);
+	write_unlock_irqrestore(&clock->lock, flags);
+	schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
+}
+
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
+			    const struct timespec64 *ts)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+						 ptp_info);
+	u64 ns = timespec64_to_ns(ts);
+	unsigned long flags;
+
+	write_lock_irqsave(&clock->lock, flags);
+	timecounter_init(&clock->tc, &clock->cycles, ns);
+	write_unlock_irqrestore(&clock->lock, flags);
+
+	return 0;
+}
+
+static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+						ptp_info);
+	u64 ns;
+	unsigned long flags;
+
+	write_lock_irqsave(&clock->lock, flags);
+	ns = timecounter_read(&clock->tc);
+	write_unlock_irqrestore(&clock->lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+						ptp_info);
+	unsigned long flags;
+
+	write_lock_irqsave(&clock->lock, flags);
+	timecounter_adjtime(&clock->tc, delta);
+	write_unlock_irqrestore(&clock->lock, flags);
+
+	return 0;
+}
+
+static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+	u64 adj;
+	u32 diff;
+	unsigned long flags;
+	int neg_adj = 0;
+	struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+						ptp_info);
+
+	if (delta < 0) {
+		neg_adj = 1;
+		delta = -delta;
+	}
+
+	adj = clock->nominal_c_mult;
+	adj *= delta;
+	diff = div_u64(adj, 1000000000ULL);
+
+	write_lock_irqsave(&clock->lock, flags);
+	timecounter_read(&clock->tc);
+	clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
+				       clock->nominal_c_mult + diff;
+	write_unlock_irqrestore(&clock->lock, flags);
+
+	return 0;
+}
+
+static int mlx5_extts_configure(struct ptp_clock_info *ptp,
+				struct ptp_clock_request *rq,
+				int on)
+{
+	struct mlx5_clock *clock =
+			container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev =
+			container_of(clock, struct mlx5_core_dev, clock);
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	u32 field_select = 0;
+	u8 pin_mode = 0;
+	u8 pattern = 0;
+	int pin = -1;
+	int err = 0;
+
+	if (!MLX5_PPS_CAP(mdev))
+		return -EOPNOTSUPP;
+
+	if (rq->extts.index >= clock->ptp_info.n_pins)
+		return -EINVAL;
+
+	if (on) {
+		pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+		if (pin < 0)
+			return -EBUSY;
+		pin_mode = MLX5_PIN_MODE_IN;
+		pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
+		field_select = MLX5_MTPPS_FS_PIN_MODE |
+			       MLX5_MTPPS_FS_PATTERN |
+			       MLX5_MTPPS_FS_ENABLE;
+	} else {
+		pin = rq->extts.index;
+		field_select = MLX5_MTPPS_FS_ENABLE;
+	}
+
+	MLX5_SET(mtpps_reg, in, pin, pin);
+	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+	MLX5_SET(mtpps_reg, in, pattern, pattern);
+	MLX5_SET(mtpps_reg, in, enable, on);
+	MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+	err = mlx5_set_mtpps(mdev, in, sizeof(in));
+	if (err)
+		return err;
+
+	return mlx5_set_mtppse(mdev, pin, 0,
+			       MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_perout_configure(struct ptp_clock_info *ptp,
+				 struct ptp_clock_request *rq,
+				 int on)
+{
+	struct mlx5_clock *clock =
+			container_of(ptp, struct mlx5_clock, ptp_info);
+	struct mlx5_core_dev *mdev =
+			container_of(clock, struct mlx5_core_dev, clock);
+	u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+	u64 nsec_now, nsec_delta, time_stamp = 0;
+	u64 cycles_now, cycles_delta;
+	struct timespec64 ts;
+	unsigned long flags;
+	u32 field_select = 0;
+	u8 pin_mode = 0;
+	u8 pattern = 0;
+	int pin = -1;
+	int err = 0;
+	s64 ns;
+
+	if (!MLX5_PPS_CAP(mdev))
+		return -EOPNOTSUPP;
+
+	if (rq->perout.index >= clock->ptp_info.n_pins)
+		return -EINVAL;
+
+	if (on) {
+		pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT,
+				   rq->perout.index);
+		if (pin < 0)
+			return -EBUSY;
+
+		pin_mode = MLX5_PIN_MODE_OUT;
+		pattern = MLX5_OUT_PATTERN_PERIODIC;
+		ts.tv_sec = rq->perout.period.sec;
+		ts.tv_nsec = rq->perout.period.nsec;
+		ns = timespec64_to_ns(&ts);
+
+		if ((ns >> 1) != 500000000LL)
+			return -EINVAL;
+
+		ts.tv_sec = rq->perout.start.sec;
+		ts.tv_nsec = rq->perout.start.nsec;
+		ns = timespec64_to_ns(&ts);
+		cycles_now = mlx5_read_internal_timer(mdev);
+		write_lock_irqsave(&clock->lock, flags);
+		nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+		nsec_delta = ns - nsec_now;
+		cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+					 clock->cycles.mult);
+		write_unlock_irqrestore(&clock->lock, flags);
+		time_stamp = cycles_now + cycles_delta;
+		field_select = MLX5_MTPPS_FS_PIN_MODE |
+			       MLX5_MTPPS_FS_PATTERN |
+			       MLX5_MTPPS_FS_ENABLE |
+			       MLX5_MTPPS_FS_TIME_STAMP;
+	} else {
+		pin = rq->perout.index;
+		field_select = MLX5_MTPPS_FS_ENABLE;
+	}
+
+	MLX5_SET(mtpps_reg, in, pin, pin);
+	MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+	MLX5_SET(mtpps_reg, in, pattern, pattern);
+	MLX5_SET(mtpps_reg, in, enable, on);
+	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+	MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+	err = mlx5_set_mtpps(mdev, in, sizeof(in));
+	if (err)
+		return err;
+
+	return mlx5_set_mtppse(mdev, pin, 0,
+			       MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_pps_configure(struct ptp_clock_info *ptp,
+			      struct ptp_clock_request *rq,
+			      int on)
+{
+	struct mlx5_clock *clock =
+			container_of(ptp, struct mlx5_clock, ptp_info);
+
+	clock->pps_info.enabled = !!on;
+	return 0;
+}
+
+static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
+			   struct ptp_clock_request *rq,
+			   int on)
+{
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return mlx5_extts_configure(ptp, rq, on);
+	case PTP_CLK_REQ_PEROUT:
+		return mlx5_perout_configure(ptp, rq, on);
+	case PTP_CLK_REQ_PPS:
+		return mlx5_pps_configure(ptp, rq, on);
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+			   enum ptp_pin_function func, unsigned int chan)
+{
+	return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0;
+}
+
+static const struct ptp_clock_info mlx5_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "mlx5_p2p",
+	.max_adj	= 100000000,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfreq	= mlx5_ptp_adjfreq,
+	.adjtime	= mlx5_ptp_adjtime,
+	.gettime64	= mlx5_ptp_gettime,
+	.settime64	= mlx5_ptp_settime,
+	.enable		= NULL,
+	.verify		= NULL,
+};
+
+static int mlx5_init_pin_config(struct mlx5_clock *clock)
+{
+	int i;
+
+	clock->ptp_info.pin_config =
+			kzalloc(sizeof(*clock->ptp_info.pin_config) *
+				clock->ptp_info.n_pins, GFP_KERNEL);
+	if (!clock->ptp_info.pin_config)
+		return -ENOMEM;
+	clock->ptp_info.enable = mlx5_ptp_enable;
+	clock->ptp_info.verify = mlx5_ptp_verify;
+	clock->ptp_info.pps = 1;
+
+	for (i = 0; i < clock->ptp_info.n_pins; i++) {
+		snprintf(clock->ptp_info.pin_config[i].name,
+			 sizeof(clock->ptp_info.pin_config[i].name),
+			 "mlx5_pps%d", i);
+		clock->ptp_info.pin_config[i].index = i;
+		clock->ptp_info.pin_config[i].func = PTP_PF_NONE;
+		clock->ptp_info.pin_config[i].chan = i;
+	}
+
+	return 0;
+}
+
+static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+	mlx5_query_mtpps(mdev, out, sizeof(out));
+
+	clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
+					  cap_number_of_pps_pins);
+	clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
+					    cap_max_num_of_pps_in_pins);
+	clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+					     cap_max_num_of_pps_out_pins);
+
+	clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+	clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+	clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+	clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+	clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+	clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+	clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+	clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+}
+
+void mlx5_pps_event(struct mlx5_core_dev *mdev,
+		    struct mlx5_eqe *eqe)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	struct ptp_clock_event ptp_event;
+	struct timespec64 ts;
+	u64 nsec_now, nsec_delta;
+	u64 cycles_now, cycles_delta;
+	int pin = eqe->data.pps.pin;
+	s64 ns;
+	unsigned long flags;
+
+	switch (clock->ptp_info.pin_config[pin].func) {
+	case PTP_PF_EXTTS:
+		if (clock->pps_info.enabled) {
+			ptp_event.type = PTP_CLOCK_PPSUSR;
+			ptp_event.pps_times.ts_real = ns_to_timespec64(eqe->data.pps.time_stamp);
+		} else {
+			ptp_event.type = PTP_CLOCK_EXTTS;
+		}
+		ptp_clock_event(clock->ptp, &ptp_event);
+		break;
+	case PTP_PF_PEROUT:
+		mlx5_ptp_gettime(&clock->ptp_info, &ts);
+		cycles_now = mlx5_read_internal_timer(mdev);
+		ts.tv_sec += 1;
+		ts.tv_nsec = 0;
+		ns = timespec64_to_ns(&ts);
+		write_lock_irqsave(&clock->lock, flags);
+		nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+		nsec_delta = ns - nsec_now;
+		cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+					 clock->cycles.mult);
+		clock->pps_info.start[pin] = cycles_now + cycles_delta;
+		schedule_work(&clock->pps_info.out_work);
+		write_unlock_irqrestore(&clock->lock, flags);
+		break;
+	default:
+		mlx5_core_err(mdev, " Unhandled event\n");
+	}
+}
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	u64 ns;
+	u64 frac = 0;
+	u32 dev_freq;
+
+	dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
+	if (!dev_freq) {
+		mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+		return;
+	}
+	rwlock_init(&clock->lock);
+	clock->cycles.read = read_internal_timer;
+	clock->cycles.shift = MLX5_CYCLES_SHIFT;
+	clock->cycles.mult = clocksource_khz2mult(dev_freq,
+						  clock->cycles.shift);
+	clock->nominal_c_mult = clock->cycles.mult;
+	clock->cycles.mask = CLOCKSOURCE_MASK(41);
+
+	timecounter_init(&clock->tc, &clock->cycles,
+			 ktime_to_ns(ktime_get_real()));
+
+	/* Calculate period in seconds to call the overflow watchdog - to make
+	 * sure counter is checked at least once every wrap around.
+	 */
+	ns = cyclecounter_cyc2ns(&clock->cycles, clock->cycles.mask,
+				 frac, &frac);
+	do_div(ns, NSEC_PER_SEC / 2 / HZ);
+	clock->overflow_period = ns;
+
+	INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
+	INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
+	if (clock->overflow_period)
+		schedule_delayed_work(&clock->overflow_work, 0);
+	else
+		mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n");
+
+	/* Configure the PHC */
+	clock->ptp_info = mlx5_ptp_clock_info;
+
+	/* Initialize 1PPS data structures */
+	if (MLX5_PPS_CAP(mdev))
+		mlx5_get_pps_caps(mdev);
+	if (clock->ptp_info.n_pins)
+		mlx5_init_pin_config(clock);
+
+	clock->ptp = ptp_clock_register(&clock->ptp_info,
+					&mdev->pdev->dev);
+	if (IS_ERR(clock->ptp)) {
+		mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+			       PTR_ERR(clock->ptp));
+		clock->ptp = NULL;
+	}
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+
+	if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+		return;
+
+	if (clock->ptp) {
+		ptp_clock_unregister(clock->ptp);
+		clock->ptp = NULL;
+	}
+
+	cancel_work_sync(&clock->pps_info.out_work);
+	cancel_delayed_work_sync(&clock->overflow_work);
+	kfree(clock->ptp_info.pin_config);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
new file mode 100644
index 000000000000..a8eecedd46c2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_CLOCK_H__
+#define __LIB_CLOCK_H__
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev);
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+						u64 timestamp)
+{
+	u64 nsec;
+
+	read_lock(&clock->lock);
+	nsec = timecounter_cyc2time(&clock->tc, timestamp);
+	read_unlock(&clock->lock);
+
+	return ns_to_ktime(nsec);
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 06562c9a6b9c..5f323442cc5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -59,6 +59,7 @@
 #include "lib/mlx5.h"
 #include "fpga/core.h"
 #include "accel/ipsec.h"
+#include "lib/clock.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -889,6 +890,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
 	mlx5_init_reserved_gids(dev);
 
+	mlx5_init_clock(dev);
+
 	err = mlx5_init_rl_table(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -949,6 +952,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 	mlx5_mpfs_cleanup(dev);
 	mlx5_cleanup_rl_table(dev);
+	mlx5_cleanup_clock(dev);
 	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index b7c2900b75f9..ff4a0b889a6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -93,6 +93,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 		     unsigned long param);
 void mlx5_core_page_fault(struct mlx5_core_dev *dev,
 			  struct mlx5_pagefault *pfault);
+void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
@@ -121,6 +122,8 @@ int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
 			u8 access_reg_group);
 int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group,
 			u8 access_reg_group);
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+			u8 feature_group, u8 access_reg_group);
 
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index e07061f565d6..c37d00cd472a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -98,6 +98,18 @@ int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group,
 	return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0);
 }
 
+int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
+			u8 feature_group, u8 access_reg_group)
+{
+	u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(qcam_reg);
+
+	MLX5_SET(qcam_reg, in, feature_group, feature_group);
+	MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group);
+
+	return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0);
+}
+
 struct mlx5_reg_pcap {
 	u8			rsvd0;
 	u8			port_num;
@@ -959,3 +971,102 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode)
 	return mlx5_core_access_reg(mdev, in, sizeof(in), out,
 				    sizeof(out), MLX5_REG_MTPPSE, 0, 1);
 }
+
+int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state)
+{
+	u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+	int err;
+
+	MLX5_SET(qpts_reg, in, local_port, 1);
+	MLX5_SET(qpts_reg, in, trust_state, trust_state);
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+				   sizeof(out), MLX5_REG_QPTS, 0, 1);
+	return err;
+}
+
+int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state)
+{
+	u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {};
+	int err;
+
+	MLX5_SET(qpts_reg, in, local_port, 1);
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+				   sizeof(out), MLX5_REG_QPTS, 0, 0);
+	if (!err)
+		*trust_state = MLX5_GET(qpts_reg, out, trust_state);
+
+	return err;
+}
+
+int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio)
+{
+	int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+	void *qpdpm_dscp;
+	void *out;
+	void *in;
+	int err;
+
+	in = kzalloc(sz, GFP_KERNEL);
+	out = kzalloc(sz, GFP_KERNEL);
+	if (!in || !out) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	MLX5_SET(qpdpm_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+	if (err)
+		goto out;
+
+	memcpy(in, out, sz);
+	MLX5_SET(qpdpm_reg, in, local_port, 1);
+
+	/* Update the corresponding dscp entry */
+	qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]);
+	MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio);
+	MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1);
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1);
+
+out:
+	kfree(in);
+	kfree(out);
+	return err;
+}
+
+/* dscp2prio[i]: priority that dscp i mapped to */
+#define MLX5E_SUPPORTED_DSCP 64
+int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio)
+{
+	int sz = MLX5_ST_SZ_BYTES(qpdpm_reg);
+	void *qpdpm_dscp;
+	void *out;
+	void *in;
+	int err;
+	int i;
+
+	in = kzalloc(sz, GFP_KERNEL);
+	out = kzalloc(sz, GFP_KERNEL);
+	if (!in || !out) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	MLX5_SET(qpdpm_reg, in, local_port, 1);
+	err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0);
+	if (err)
+		goto out;
+
+	for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) {
+		qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]);
+		dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio);
+	}
+
+out:
+	kfree(in);
+	kfree(out);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9a5a1cc877b6..9463c3fa254f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -18,7 +18,9 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_kvdl.o spectrum_acl_tcam.o \
 				   spectrum_acl.o spectrum_flower.o \
 				   spectrum_cnt.o spectrum_fid.o \
-				   spectrum_ipip.o
+				   spectrum_ipip.o spectrum_acl_flex_actions.o \
+				   spectrum_mr.o spectrum_mr_tcam.o \
+				   spectrum_qdisc.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 5ae110172c22..6a979a09ab72 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -399,23 +399,25 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block)
 }
 EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index);
 
-void mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
 {
-	if (WARN_ON(block->finished))
-		return;
+	if (block->finished)
+		return -EINVAL;
 	mlxsw_afa_set_goto_set(block->cur_set,
 			       MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0);
 	block->finished = true;
+	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_continue);
 
-void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
 {
-	if (WARN_ON(block->finished))
-		return;
+	if (block->finished)
+		return -EINVAL;
 	mlxsw_afa_set_goto_set(block->cur_set,
 			       MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id);
 	block->finished = true;
+	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_jump);
 
@@ -674,6 +676,7 @@ enum mlxsw_afa_trapdisc_trap_action {
 MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4);
 
 enum mlxsw_afa_trapdisc_forward_action {
+	MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1,
 	MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3,
 };
 
@@ -712,7 +715,7 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block)
 }
 EXPORT_SYMBOL(mlxsw_afa_block_append_drop);
 
-int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block)
+int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id)
 {
 	char *act = mlxsw_afa_block_append_action(block,
 						  MLXSW_AFA_TRAPDISC_CODE,
@@ -722,11 +725,27 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block)
 		return -ENOBUFS;
 	mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
 				MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD,
-				MLXSW_TRAP_ID_ACL0);
+				trap_id);
 	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_append_trap);
 
+int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
+					    u16 trap_id)
+{
+	char *act = mlxsw_afa_block_append_action(block,
+						  MLXSW_AFA_TRAPDISC_CODE,
+						  MLXSW_AFA_TRAPDISC_SIZE);
+
+	if (!act)
+		return -ENOBUFS;
+	mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
+				MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD,
+				trap_id);
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward);
+
 /* Forwarding Action
  * -----------------
  * Forwarding Action can be used to implement Policy Based Switching (PBS)
@@ -891,3 +910,74 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid)
 	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set);
+
+/* MC Routing Action
+ * -----------------
+ * The Multicast router action. Can be used by RMFT_V2 - Router Multicast
+ * Forwarding Table Version 2 Register.
+ */
+
+#define MLXSW_AFA_MCROUTER_CODE 0x10
+#define MLXSW_AFA_MCROUTER_SIZE 2
+
+enum mlxsw_afa_mcrouter_rpf_action {
+	MLXSW_AFA_MCROUTER_RPF_ACTION_NOP,
+	MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
+	MLXSW_AFA_MCROUTER_RPF_ACTION_DISCARD_ERROR,
+};
+
+/* afa_mcrouter_rpf_action */
+MLXSW_ITEM32(afa, mcrouter, rpf_action, 0x00, 28, 3);
+
+/* afa_mcrouter_expected_irif */
+MLXSW_ITEM32(afa, mcrouter, expected_irif, 0x00, 0, 16);
+
+/* afa_mcrouter_min_mtu */
+MLXSW_ITEM32(afa, mcrouter, min_mtu, 0x08, 0, 16);
+
+enum mlxsw_afa_mrouter_vrmid {
+	MLXSW_AFA_MCROUTER_VRMID_INVALID,
+	MLXSW_AFA_MCROUTER_VRMID_VALID
+};
+
+/* afa_mcrouter_vrmid
+ * Valid RMID: rigr_rmid_index is used as RMID
+ */
+MLXSW_ITEM32(afa, mcrouter, vrmid, 0x0C, 31, 1);
+
+/* afa_mcrouter_rigr_rmid_index
+ * When the vrmid field is set to invalid, the field is used as pointer to
+ * Router Interface Group (RIGR) Table in the KVD linear.
+ * When the vrmid is set to valid, the field is used as RMID index, ranged
+ * from 0 to max_mid - 1. The index is to the Port Group Table.
+ */
+MLXSW_ITEM32(afa, mcrouter, rigr_rmid_index, 0x0C, 0, 24);
+
+static inline void
+mlxsw_afa_mcrouter_pack(char *payload,
+			enum mlxsw_afa_mcrouter_rpf_action rpf_action,
+			u16 expected_irif, u16 min_mtu,
+			enum mlxsw_afa_mrouter_vrmid vrmid, u32 rigr_rmid_index)
+
+{
+	mlxsw_afa_mcrouter_rpf_action_set(payload, rpf_action);
+	mlxsw_afa_mcrouter_expected_irif_set(payload, expected_irif);
+	mlxsw_afa_mcrouter_min_mtu_set(payload, min_mtu);
+	mlxsw_afa_mcrouter_vrmid_set(payload, vrmid);
+	mlxsw_afa_mcrouter_rigr_rmid_index_set(payload, rigr_rmid_index);
+}
+
+int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
+				    u16 expected_irif, u16 min_mtu,
+				    bool rmid_valid, u32 kvdl_index)
+{
+	char *act = mlxsw_afa_block_append_action(block,
+						  MLXSW_AFA_MCROUTER_CODE,
+						  MLXSW_AFA_MCROUTER_SIZE);
+	if (!act)
+		return -ENOBUFS;
+	mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
+				expected_irif, min_mtu, rmid_valid, kvdl_index);
+	return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index f99c341b2497..a8d3314c3a24 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -57,10 +57,12 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block);
 int mlxsw_afa_block_commit(struct mlxsw_afa_block *block);
 char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
 u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
-void mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
-void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
 int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
-int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
+int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
+					    u16 trap_id);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
 			       u8 local_port, bool in_port);
 int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
@@ -68,5 +70,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
 int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
 				   u32 counter_index);
 int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid);
+int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
+				    u16 expected_irif, u16 min_mtu,
+				    bool rmid_valid, u32 kvdl_index);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5acfbe5b8b9d..6c4e08b8058a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1758,6 +1758,191 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port,
 	}
 }
 
+/* CWTP - Congetion WRED ECN TClass Profile
+ * ----------------------------------------
+ * Configures the profiles for queues of egress port and traffic class
+ */
+#define MLXSW_REG_CWTP_ID 0x2802
+#define MLXSW_REG_CWTP_BASE_LEN 0x28
+#define MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN 0x08
+#define MLXSW_REG_CWTP_LEN 0x40
+
+MLXSW_REG_DEFINE(cwtp, MLXSW_REG_CWTP_ID, MLXSW_REG_CWTP_LEN);
+
+/* reg_cwtp_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtp, local_port, 0, 16, 8);
+
+/* reg_cwtp_traffic_class
+ * Traffic Class to configure
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtp, traffic_class, 32, 0, 8);
+
+/* reg_cwtp_profile_min
+ * Minimum Average Queue Size of the profile in cells.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, cwtp, profile_min, MLXSW_REG_CWTP_BASE_LEN,
+		     0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 0, false);
+
+/* reg_cwtp_profile_percent
+ * Percentage of WRED and ECN marking for maximum Average Queue size
+ * Range is 0 to 100, units of integer percentage
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, cwtp, profile_percent, MLXSW_REG_CWTP_BASE_LEN,
+		     24, 7, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false);
+
+/* reg_cwtp_profile_max
+ * Maximum Average Queue size of the profile in cells
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, cwtp, profile_max, MLXSW_REG_CWTP_BASE_LEN,
+		     0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false);
+
+#define MLXSW_REG_CWTP_MIN_VALUE 64
+#define MLXSW_REG_CWTP_MAX_PROFILE 2
+#define MLXSW_REG_CWTP_DEFAULT_PROFILE 1
+
+static inline void mlxsw_reg_cwtp_pack(char *payload, u8 local_port,
+				       u8 traffic_class)
+{
+	int i;
+
+	MLXSW_REG_ZERO(cwtp, payload);
+	mlxsw_reg_cwtp_local_port_set(payload, local_port);
+	mlxsw_reg_cwtp_traffic_class_set(payload, traffic_class);
+
+	for (i = 0; i <= MLXSW_REG_CWTP_MAX_PROFILE; i++) {
+		mlxsw_reg_cwtp_profile_min_set(payload, i,
+					       MLXSW_REG_CWTP_MIN_VALUE);
+		mlxsw_reg_cwtp_profile_max_set(payload, i,
+					       MLXSW_REG_CWTP_MIN_VALUE);
+	}
+}
+
+#define MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile) (profile - 1)
+
+static inline void
+mlxsw_reg_cwtp_profile_pack(char *payload, u8 profile, u32 min, u32 max,
+			    u32 probability)
+{
+	u8 index = MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile);
+
+	mlxsw_reg_cwtp_profile_min_set(payload, index, min);
+	mlxsw_reg_cwtp_profile_max_set(payload, index, max);
+	mlxsw_reg_cwtp_profile_percent_set(payload, index, probability);
+}
+
+/* CWTPM - Congestion WRED ECN TClass and Pool Mapping
+ * ---------------------------------------------------
+ * The CWTPM register maps each egress port and traffic class to profile num.
+ */
+#define MLXSW_REG_CWTPM_ID 0x2803
+#define MLXSW_REG_CWTPM_LEN 0x44
+
+MLXSW_REG_DEFINE(cwtpm, MLXSW_REG_CWTPM_ID, MLXSW_REG_CWTPM_LEN);
+
+/* reg_cwtpm_local_port
+ * Local port number
+ * Not supported for CPU port
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtpm, local_port, 0, 16, 8);
+
+/* reg_cwtpm_traffic_class
+ * Traffic Class to configure
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, cwtpm, traffic_class, 32, 0, 8);
+
+/* reg_cwtpm_ew
+ * Control enablement of WRED for traffic class:
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ew, 36, 1, 1);
+
+/* reg_cwtpm_ee
+ * Control enablement of ECN for traffic class:
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ee, 36, 0, 1);
+
+/* reg_cwtpm_tcp_g
+ * TCP Green Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, tcp_g, 52, 0, 2);
+
+/* reg_cwtpm_tcp_y
+ * TCP Yellow Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, tcp_y, 56, 16, 2);
+
+/* reg_cwtpm_tcp_r
+ * TCP Red Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, tcp_r, 56, 0, 2);
+
+/* reg_cwtpm_ntcp_g
+ * Non-TCP Green Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ntcp_g, 60, 0, 2);
+
+/* reg_cwtpm_ntcp_y
+ * Non-TCP Yellow Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ntcp_y, 64, 16, 2);
+
+/* reg_cwtpm_ntcp_r
+ * Non-TCP Red Profile.
+ * Index of the profile within {port, traffic class} to use.
+ * 0 for disabling both WRED and ECN for this type of traffic.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, cwtpm, ntcp_r, 64, 0, 2);
+
+#define MLXSW_REG_CWTPM_RESET_PROFILE 0
+
+static inline void mlxsw_reg_cwtpm_pack(char *payload, u8 local_port,
+					u8 traffic_class, u8 profile,
+					bool wred, bool ecn)
+{
+	MLXSW_REG_ZERO(cwtpm, payload);
+	mlxsw_reg_cwtpm_local_port_set(payload, local_port);
+	mlxsw_reg_cwtpm_traffic_class_set(payload, traffic_class);
+	mlxsw_reg_cwtpm_ew_set(payload, wred);
+	mlxsw_reg_cwtpm_ee_set(payload, ecn);
+	mlxsw_reg_cwtpm_tcp_g_set(payload, profile);
+	mlxsw_reg_cwtpm_tcp_y_set(payload, profile);
+	mlxsw_reg_cwtpm_tcp_r_set(payload, profile);
+	mlxsw_reg_cwtpm_ntcp_g_set(payload, profile);
+	mlxsw_reg_cwtpm_ntcp_y_set(payload, profile);
+	mlxsw_reg_cwtpm_ntcp_r_set(payload, profile);
+}
+
 /* PPBT - Policy-Engine Port Binding Table
  * ---------------------------------------
  * This register is used for configuration of the Port Binding Table.
@@ -2142,15 +2327,14 @@ MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN);
  */
 MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24);
 
-#define MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN 0xA8
+#define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8
 
 /* reg_pefa_flex_action_set
  * Action-set to perform when rule is matched.
  * Must be zero padded if action set is shorter.
  * Access: RW
  */
-MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08,
-	       MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN);
+MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN);
 
 static inline void mlxsw_reg_pefa_pack(char *payload, u32 index,
 				       const char *flex_action_set)
@@ -2243,7 +2427,7 @@ MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80,
  * Access: RW
  */
 MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0,
-	       MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN);
+	       MLXSW_REG_FLEX_ACTION_SET_LEN);
 
 static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid,
 					enum mlxsw_reg_ptce2_op op,
@@ -3124,6 +3308,7 @@ static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port)
  */
 #define MLXSW_REG_PPCNT_ID 0x5008
 #define MLXSW_REG_PPCNT_LEN 0x100
+#define MLXSW_REG_PPCNT_COUNTERS_OFFSET 0x08
 
 MLXSW_REG_DEFINE(ppcnt, MLXSW_REG_PPCNT_ID, MLXSW_REG_PPCNT_LEN);
 
@@ -3156,8 +3341,10 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2);
 
 enum mlxsw_reg_ppcnt_grp {
 	MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0,
+	MLXSW_REG_PPCNT_EXT_CNT = 0x5,
 	MLXSW_REG_PPCNT_PRIO_CNT = 0x10,
 	MLXSW_REG_PPCNT_TC_CNT = 0x11,
+	MLXSW_REG_PPCNT_TC_CONG_TC = 0x13,
 };
 
 /* reg_ppcnt_grp
@@ -3173,6 +3360,7 @@ enum mlxsw_reg_ppcnt_grp {
  * 0x10: Per Priority Counters
  * 0x11: Per Traffic Class Counters
  * 0x12: Physical Layer Counters
+ * 0x13: Per Traffic Class Congestion Counters
  * Access: Index
  */
 MLXSW_ITEM32(reg, ppcnt, grp, 0x00, 0, 6);
@@ -3201,162 +3389,179 @@ MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5);
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_frames_transmitted_ok,
-	     0x08 + 0x00, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
 
 /* reg_ppcnt_a_frames_received_ok
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_frames_received_ok,
-	     0x08 + 0x08, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
 
 /* reg_ppcnt_a_frame_check_sequence_errors
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_frame_check_sequence_errors,
-	     0x08 + 0x10, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64);
 
 /* reg_ppcnt_a_alignment_errors
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_alignment_errors,
-	     0x08 + 0x18, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64);
 
 /* reg_ppcnt_a_octets_transmitted_ok
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_octets_transmitted_ok,
-	     0x08 + 0x20, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64);
 
 /* reg_ppcnt_a_octets_received_ok
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_octets_received_ok,
-	     0x08 + 0x28, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64);
 
 /* reg_ppcnt_a_multicast_frames_xmitted_ok
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_xmitted_ok,
-	     0x08 + 0x30, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64);
 
 /* reg_ppcnt_a_broadcast_frames_xmitted_ok
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_xmitted_ok,
-	     0x08 + 0x38, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64);
 
 /* reg_ppcnt_a_multicast_frames_received_ok
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_received_ok,
-	     0x08 + 0x40, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64);
 
 /* reg_ppcnt_a_broadcast_frames_received_ok
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_received_ok,
-	     0x08 + 0x48, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64);
 
 /* reg_ppcnt_a_in_range_length_errors
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_in_range_length_errors,
-	     0x08 + 0x50, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64);
 
 /* reg_ppcnt_a_out_of_range_length_field
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_out_of_range_length_field,
-	     0x08 + 0x58, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64);
 
 /* reg_ppcnt_a_frame_too_long_errors
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_frame_too_long_errors,
-	     0x08 + 0x60, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
 
 /* reg_ppcnt_a_symbol_error_during_carrier
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_symbol_error_during_carrier,
-	     0x08 + 0x68, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64);
 
 /* reg_ppcnt_a_mac_control_frames_transmitted
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_transmitted,
-	     0x08 + 0x70, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
 
 /* reg_ppcnt_a_mac_control_frames_received
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_received,
-	     0x08 + 0x78, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64);
 
 /* reg_ppcnt_a_unsupported_opcodes_received
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_unsupported_opcodes_received,
-	     0x08 + 0x80, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64);
 
 /* reg_ppcnt_a_pause_mac_ctrl_frames_received
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received,
-	     0x08 + 0x88, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64);
 
 /* reg_ppcnt_a_pause_mac_ctrl_frames_transmitted
  * Access: RO
  */
 MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted,
-	     0x08 + 0x90, 0, 64);
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64);
+
+/* Ethernet Extended Counter Group Counters */
+
+/* reg_ppcnt_ecn_marked
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, ecn_marked,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
 
 /* Ethernet Per Priority Group Counters */
 
 /* reg_ppcnt_rx_octets
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, rx_octets, 0x08 + 0x00, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, rx_octets,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
 
 /* reg_ppcnt_rx_frames
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, rx_frames, 0x08 + 0x20, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, rx_frames,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64);
 
 /* reg_ppcnt_tx_octets
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, tx_octets, 0x08 + 0x28, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, tx_octets,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64);
 
 /* reg_ppcnt_tx_frames
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, tx_frames, 0x08 + 0x48, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, tx_frames,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64);
 
 /* reg_ppcnt_rx_pause
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, rx_pause, 0x08 + 0x50, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, rx_pause,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64);
 
 /* reg_ppcnt_rx_pause_duration
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, 0x08 + 0x58, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, rx_pause_duration,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64);
 
 /* reg_ppcnt_tx_pause
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, tx_pause, 0x08 + 0x60, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, tx_pause,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64);
 
 /* reg_ppcnt_tx_pause_duration
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, tx_pause_duration,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64);
 
 /* reg_ppcnt_rx_pause_transition
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, tx_pause_transition,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64);
 
 /* Ethernet Per Traffic Group Counters */
 
@@ -3366,14 +3571,24 @@ MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64);
  * The field cannot be cleared.
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, 0x08 + 0x00, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
 
 /* reg_ppcnt_tc_no_buffer_discard_uc
  * The number of unicast packets dropped due to lack of shared
  * buffer resources.
  * Access: RO
  */
-MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, 0x08 + 0x08, 0, 64);
+MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64);
+
+/* Ethernet Per Traffic Class Congestion Group Counters */
+
+/* reg_ppcnt_wred_discard
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, wred_discard,
+	     MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64);
 
 static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
 					enum mlxsw_reg_ppcnt_grp grp,
@@ -3682,12 +3897,15 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
@@ -3992,6 +4210,12 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
 
+/* reg_ritr_ipv4_mc
+ * IPv4 multicast routing enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1);
+
 enum mlxsw_reg_ritr_if_type {
 	/* VLAN interface. */
 	MLXSW_REG_RITR_VLAN_IF,
@@ -4049,6 +4273,14 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
 
+/* reg_ritr_ipv4_mc_fe
+ * IPv4 Multicast Forwarding Enable.
+ * When disabled, forwarding is blocked but local traffic (traps and IP to me)
+ * will be enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1);
+
 /* reg_ritr_lb_en
  * Loop-back filter enable for unicast packets.
  * If the flag is set then loop-back filter for unicast packets is
@@ -4271,11 +4503,13 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
 	mlxsw_reg_ritr_enable_set(payload, enable);
 	mlxsw_reg_ritr_ipv4_set(payload, 1);
 	mlxsw_reg_ritr_ipv6_set(payload, 1);
+	mlxsw_reg_ritr_ipv4_mc_set(payload, 1);
 	mlxsw_reg_ritr_type_set(payload, type);
 	mlxsw_reg_ritr_op_set(payload, op);
 	mlxsw_reg_ritr_rif_set(payload, rif);
 	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
 	mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
+	mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1);
 	mlxsw_reg_ritr_lb_en_set(payload, 1);
 	mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
 	mlxsw_reg_ritr_mtu_set(payload, mtu);
@@ -4311,6 +4545,57 @@ mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
 	mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
 }
 
+/* RTAR - Router TCAM Allocation Register
+ * --------------------------------------
+ * This register is used for allocation of regions in the TCAM table.
+ */
+#define MLXSW_REG_RTAR_ID 0x8004
+#define MLXSW_REG_RTAR_LEN 0x20
+
+MLXSW_REG_DEFINE(rtar, MLXSW_REG_RTAR_ID, MLXSW_REG_RTAR_LEN);
+
+enum mlxsw_reg_rtar_op {
+	MLXSW_REG_RTAR_OP_ALLOCATE,
+	MLXSW_REG_RTAR_OP_RESIZE,
+	MLXSW_REG_RTAR_OP_DEALLOCATE,
+};
+
+/* reg_rtar_op
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, op, 0x00, 28, 4);
+
+enum mlxsw_reg_rtar_key_type {
+	MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST = 1,
+	MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST = 3
+};
+
+/* reg_rtar_key_type
+ * TCAM key type for the region.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, key_type, 0x00, 0, 8);
+
+/* reg_rtar_region_size
+ * TCAM region size. When allocating/resizing this is the requested
+ * size, the response is the actual size.
+ * Note: Actual size may be larger than requested.
+ * Reserved for op = Deallocate
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, region_size, 0x04, 0, 16);
+
+static inline void mlxsw_reg_rtar_pack(char *payload,
+				       enum mlxsw_reg_rtar_op op,
+				       enum mlxsw_reg_rtar_key_type key_type,
+				       u16 region_size)
+{
+	MLXSW_REG_ZERO(rtar, payload);
+	mlxsw_reg_rtar_op_set(payload, op);
+	mlxsw_reg_rtar_key_type_set(payload, key_type);
+	mlxsw_reg_rtar_region_size_set(payload, region_size);
+}
+
 /* RATR - Router Adjacency Table Register
  * --------------------------------------
  * The RATR register is used to configure the Router Adjacency (next-hop)
@@ -4480,6 +4765,27 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32);
  */
 MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
 
+enum mlxsw_reg_flow_counter_set_type {
+	/* No count */
+	MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+	/* Count packets and bytes */
+	MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
+	/* Count only packets */
+	MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05,
+};
+
+/* reg_ratr_counter_set_type
+ * Counter set type for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8);
+
+/* reg_ratr_counter_index
+ * Counter index for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24);
+
 static inline void
 mlxsw_reg_ratr_pack(char *payload,
 		    enum mlxsw_reg_ratr_op op, bool valid,
@@ -4507,6 +4813,20 @@ static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip)
 	mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip);
 }
 
+static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index,
+					       bool counter_enable)
+{
+	enum mlxsw_reg_flow_counter_set_type set_type;
+
+	if (counter_enable)
+		set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES;
+	else
+		set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT;
+
+	mlxsw_reg_ratr_counter_index_set(payload, counter_index);
+	mlxsw_reg_ratr_counter_set_type_set(payload, set_type);
+}
+
 /* RICNT - Router Interface Counter Register
  * -----------------------------------------
  * The RICNT register retrieves per port performance counters
@@ -4630,6 +4950,65 @@ static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index,
 					     MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC);
 }
 
+/* RRCR - Router Rules Copy Register Layout
+ * ----------------------------------------
+ * This register is used for moving and copying route entry rules.
+ */
+#define MLXSW_REG_RRCR_ID 0x800F
+#define MLXSW_REG_RRCR_LEN 0x24
+
+MLXSW_REG_DEFINE(rrcr, MLXSW_REG_RRCR_ID, MLXSW_REG_RRCR_LEN);
+
+enum mlxsw_reg_rrcr_op {
+	/* Move rules */
+	MLXSW_REG_RRCR_OP_MOVE,
+	/* Copy rules */
+	MLXSW_REG_RRCR_OP_COPY,
+};
+
+/* reg_rrcr_op
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rrcr, op, 0x00, 28, 4);
+
+/* reg_rrcr_offset
+ * Offset within the region from which to copy/move.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, offset, 0x00, 0, 16);
+
+/* reg_rrcr_size
+ * The number of rules to copy/move.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rrcr, size, 0x04, 0, 16);
+
+/* reg_rrcr_table_id
+ * Identifier of the table on which to perform the operation. Encoding is the
+ * same as in RTAR.key_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, table_id, 0x10, 0, 4);
+
+/* reg_rrcr_dest_offset
+ * Offset within the region to which to copy/move
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, dest_offset, 0x20, 0, 16);
+
+static inline void mlxsw_reg_rrcr_pack(char *payload, enum mlxsw_reg_rrcr_op op,
+				       u16 offset, u16 size,
+				       enum mlxsw_reg_rtar_key_type table_id,
+				       u16 dest_offset)
+{
+	MLXSW_REG_ZERO(rrcr, payload);
+	mlxsw_reg_rrcr_op_set(payload, op);
+	mlxsw_reg_rrcr_offset_set(payload, offset);
+	mlxsw_reg_rrcr_size_set(payload, size);
+	mlxsw_reg_rrcr_table_id_set(payload, table_id);
+	mlxsw_reg_rrcr_dest_offset_set(payload, dest_offset);
+}
+
 /* RALTA - Router Algorithmic LPM Tree Allocation Register
  * -------------------------------------------------------
  * RALTA is used to allocate the LPM trees of the SHSPM method.
@@ -5169,15 +5548,6 @@ enum mlxsw_reg_rauht_trap_id {
  */
 MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
 
-enum mlxsw_reg_flow_counter_set_type {
-	/* No count */
-	MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00,
-	/* Count packets and bytes */
-	MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
-	/* Count only packets */
-	MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05,
-};
-
 /* reg_rauht_counter_set_type
  * Counter set type for flow counters
  * Access: RW
@@ -5596,6 +5966,360 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
 	mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
 }
 
+/* RIGR-V2 - Router Interface Group Register Version 2
+ * ---------------------------------------------------
+ * The RIGR_V2 register is used to add, remove and query egress interface list
+ * of a multicast forwarding entry.
+ */
+#define MLXSW_REG_RIGR2_ID 0x8023
+#define MLXSW_REG_RIGR2_LEN 0xB0
+
+#define MLXSW_REG_RIGR2_MAX_ERIFS 32
+
+MLXSW_REG_DEFINE(rigr2, MLXSW_REG_RIGR2_ID, MLXSW_REG_RIGR2_LEN);
+
+/* reg_rigr2_rigr_index
+ * KVD Linear index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rigr2, rigr_index, 0x04, 0, 24);
+
+/* reg_rigr2_vnext
+ * Next RIGR Index is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, vnext, 0x08, 31, 1);
+
+/* reg_rigr2_next_rigr_index
+ * Next RIGR Index. The index is to the KVD linear.
+ * Reserved when vnxet = '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, next_rigr_index, 0x08, 0, 24);
+
+/* reg_rigr2_vrmid
+ * RMID Index is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, vrmid, 0x20, 31, 1);
+
+/* reg_rigr2_rmid_index
+ * RMID Index.
+ * Range 0 .. max_mid - 1
+ * Reserved when vrmid = '0'.
+ * The index is to the Port Group Table (PGT)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, rmid_index, 0x20, 0, 16);
+
+/* reg_rigr2_erif_entry_v
+ * Egress Router Interface is valid.
+ * Note that low-entries must be set if high-entries are set. For
+ * example: if erif_entry[2].v is set then erif_entry[1].v and
+ * erif_entry[0].v must be set.
+ * Index can be from 0 to cap_mc_erif_list_entries-1
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_v, 0x24, 31, 1, 4, 0, false);
+
+/* reg_rigr2_erif_entry_erif
+ * Egress Router Interface.
+ * Valid range is from 0 to cap_max_router_interfaces - 1
+ * Index can be from 0 to MLXSW_REG_RIGR2_MAX_ERIFS - 1
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_erif, 0x24, 0, 16, 4, 0, false);
+
+static inline void mlxsw_reg_rigr2_pack(char *payload, u32 rigr_index,
+					bool vnext, u32 next_rigr_index)
+{
+	MLXSW_REG_ZERO(rigr2, payload);
+	mlxsw_reg_rigr2_rigr_index_set(payload, rigr_index);
+	mlxsw_reg_rigr2_vnext_set(payload, vnext);
+	mlxsw_reg_rigr2_next_rigr_index_set(payload, next_rigr_index);
+	mlxsw_reg_rigr2_vrmid_set(payload, 0);
+	mlxsw_reg_rigr2_rmid_index_set(payload, 0);
+}
+
+static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index,
+						   bool v, u16 erif)
+{
+	mlxsw_reg_rigr2_erif_entry_v_set(payload, index, v);
+	mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif);
+}
+
+/* RECR-V2 - Router ECMP Configuration Version 2 Register
+ * ------------------------------------------------------
+ */
+#define MLXSW_REG_RECR2_ID 0x8025
+#define MLXSW_REG_RECR2_LEN 0x38
+
+MLXSW_REG_DEFINE(recr2, MLXSW_REG_RECR2_ID, MLXSW_REG_RECR2_LEN);
+
+/* reg_recr2_pp
+ * Per-port configuration
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, recr2, pp, 0x00, 24, 1);
+
+/* reg_recr2_sh
+ * Symmetric hash
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, recr2, sh, 0x00, 8, 1);
+
+/* reg_recr2_seed
+ * Seed
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, recr2, seed, 0x08, 0, 32);
+
+enum {
+	/* Enable IPv4 fields if packet is not TCP and not UDP */
+	MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP	= 3,
+	/* Enable IPv4 fields if packet is TCP or UDP */
+	MLXSW_REG_RECR2_IPV4_EN_TCP_UDP		= 4,
+	/* Enable IPv6 fields if packet is not TCP and not UDP */
+	MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP	= 5,
+	/* Enable IPv6 fields if packet is TCP or UDP */
+	MLXSW_REG_RECR2_IPV6_EN_TCP_UDP		= 6,
+	/* Enable TCP/UDP header fields if packet is IPv4 */
+	MLXSW_REG_RECR2_TCP_UDP_EN_IPV4		= 7,
+	/* Enable TCP/UDP header fields if packet is IPv6 */
+	MLXSW_REG_RECR2_TCP_UDP_EN_IPV6		= 8,
+};
+
+/* reg_recr2_outer_header_enables
+ * Bit mask where each bit enables a specific layer to be included in
+ * the hash calculation.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_enables, 0x10, 0x04, 1);
+
+enum {
+	/* IPv4 Source IP */
+	MLXSW_REG_RECR2_IPV4_SIP0			= 9,
+	MLXSW_REG_RECR2_IPV4_SIP3			= 12,
+	/* IPv4 Destination IP */
+	MLXSW_REG_RECR2_IPV4_DIP0			= 13,
+	MLXSW_REG_RECR2_IPV4_DIP3			= 16,
+	/* IP Protocol */
+	MLXSW_REG_RECR2_IPV4_PROTOCOL			= 17,
+	/* IPv6 Source IP */
+	MLXSW_REG_RECR2_IPV6_SIP0_7			= 21,
+	MLXSW_REG_RECR2_IPV6_SIP8			= 29,
+	MLXSW_REG_RECR2_IPV6_SIP15			= 36,
+	/* IPv6 Destination IP */
+	MLXSW_REG_RECR2_IPV6_DIP0_7			= 37,
+	MLXSW_REG_RECR2_IPV6_DIP8			= 45,
+	MLXSW_REG_RECR2_IPV6_DIP15			= 52,
+	/* IPv6 Next Header */
+	MLXSW_REG_RECR2_IPV6_NEXT_HEADER		= 53,
+	/* IPv6 Flow Label */
+	MLXSW_REG_RECR2_IPV6_FLOW_LABEL			= 57,
+	/* TCP/UDP Source Port */
+	MLXSW_REG_RECR2_TCP_UDP_SPORT			= 74,
+	/* TCP/UDP Destination Port */
+	MLXSW_REG_RECR2_TCP_UDP_DPORT			= 75,
+};
+
+/* reg_recr2_outer_header_fields_enable
+ * Packet fields to enable for ECMP hash subject to outer_header_enable.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1);
+
+static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload)
+{
+	int i;
+
+	for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++)
+		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+							       true);
+}
+
+static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload)
+{
+	int i;
+
+	for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++)
+		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+							       true);
+}
+
+static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload)
+{
+	int i = MLXSW_REG_RECR2_IPV6_SIP0_7;
+
+	mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
+
+	i = MLXSW_REG_RECR2_IPV6_SIP8;
+	for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++)
+		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+							       true);
+}
+
+static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload)
+{
+	int i = MLXSW_REG_RECR2_IPV6_DIP0_7;
+
+	mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
+
+	i = MLXSW_REG_RECR2_IPV6_DIP8;
+	for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++)
+		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
+							       true);
+}
+
+static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed)
+{
+	MLXSW_REG_ZERO(recr2, payload);
+	mlxsw_reg_recr2_pp_set(payload, false);
+	mlxsw_reg_recr2_sh_set(payload, true);
+	mlxsw_reg_recr2_seed_set(payload, seed);
+}
+
+/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register
+ * --------------------------------------------------------------
+ * The RMFT_V2 register is used to configure and query the multicast table.
+ */
+#define MLXSW_REG_RMFT2_ID 0x8027
+#define MLXSW_REG_RMFT2_LEN 0x174
+
+MLXSW_REG_DEFINE(rmft2, MLXSW_REG_RMFT2_ID, MLXSW_REG_RMFT2_LEN);
+
+/* reg_rmft2_v
+ * Valid
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, v, 0x00, 31, 1);
+
+enum mlxsw_reg_rmft2_type {
+	MLXSW_REG_RMFT2_TYPE_IPV4,
+	MLXSW_REG_RMFT2_TYPE_IPV6
+};
+
+/* reg_rmft2_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rmft2, type, 0x00, 28, 2);
+
+enum mlxsw_sp_reg_rmft2_op {
+	/* For Write:
+	 * Write operation. Used to write a new entry to the table. All RW
+	 * fields are relevant for new entry. Activity bit is set for new
+	 * entries - Note write with v (Valid) 0 will delete the entry.
+	 * For Query:
+	 * Read operation
+	 */
+	MLXSW_REG_RMFT2_OP_READ_WRITE,
+};
+
+/* reg_rmft2_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rmft2, op, 0x00, 20, 2);
+
+/* reg_rmft2_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the specific
+ * entry.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, rmft2, a, 0x00, 16, 1);
+
+/* reg_rmft2_offset
+ * Offset within the multicast forwarding table to write to.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rmft2, offset, 0x00, 0, 16);
+
+/* reg_rmft2_virtual_router
+ * Virtual Router ID. Range from 0..cap_max_virtual_routers-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, virtual_router, 0x04, 0, 16);
+
+enum mlxsw_reg_rmft2_irif_mask {
+	MLXSW_REG_RMFT2_IRIF_MASK_IGNORE,
+	MLXSW_REG_RMFT2_IRIF_MASK_COMPARE
+};
+
+/* reg_rmft2_irif_mask
+ * Ingress RIF mask.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1);
+
+/* reg_rmft2_irif
+ * Ingress RIF index.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16);
+
+/* reg_rmft2_dip4
+ * Destination IPv4 address
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32);
+
+/* reg_rmft2_dip4_mask
+ * A bit that is set directs the TCAM to compare the corresponding bit in key. A
+ * bit that is clear directs the TCAM to ignore the corresponding bit in key.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32);
+
+/* reg_rmft2_sip4
+ * Source IPv4 address
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32);
+
+/* reg_rmft2_sip4_mask
+ * A bit that is set directs the TCAM to compare the corresponding bit in key. A
+ * bit that is clear directs the TCAM to ignore the corresponding bit in key.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32);
+
+/* reg_rmft2_flexible_action_set
+ * ACL action set. The only supported action types in this field and in any
+ * action-set pointed from here are as follows:
+ * 00h: ACTION_NULL
+ * 01h: ACTION_MAC_TTL, only TTL configuration is supported.
+ * 03h: ACTION_TRAP
+ * 06h: ACTION_QOS
+ * 08h: ACTION_POLICING_MONITORING
+ * 10h: ACTION_ROUTER_MC
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80,
+	       MLXSW_REG_FLEX_ACTION_SET_LEN);
+
+static inline void
+mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+			  enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+			  u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask,
+			  const char *flexible_action_set)
+{
+	MLXSW_REG_ZERO(rmft2, payload);
+	mlxsw_reg_rmft2_v_set(payload, v);
+	mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4);
+	mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE);
+	mlxsw_reg_rmft2_offset_set(payload, offset);
+	mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router);
+	mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask);
+	mlxsw_reg_rmft2_irif_set(payload, irif);
+	mlxsw_reg_rmft2_dip4_set(payload, dip4);
+	mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask);
+	mlxsw_reg_rmft2_sip4_set(payload, sip4);
+	mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask);
+	if (flexible_action_set)
+		mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload,
+							      flexible_action_set);
+}
+
 /* MFCR - Management Fan Control Register
  * --------------------------------------
  * This register controls the settings of the Fan Speed PWM mechanism.
@@ -6885,6 +7609,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(svpe),
 	MLXSW_REG(sfmr),
 	MLXSW_REG(spvmlr),
+	MLXSW_REG(cwtp),
+	MLXSW_REG(cwtpm),
 	MLXSW_REG(ppbt),
 	MLXSW_REG(pacl),
 	MLXSW_REG(pagt),
@@ -6911,9 +7637,11 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(hpkt),
 	MLXSW_REG(rgcr),
 	MLXSW_REG(ritr),
+	MLXSW_REG(rtar),
 	MLXSW_REG(ratr),
 	MLXSW_REG(rtdp),
 	MLXSW_REG(ricnt),
+	MLXSW_REG(rrcr),
 	MLXSW_REG(ralta),
 	MLXSW_REG(ralst),
 	MLXSW_REG(raltb),
@@ -6921,6 +7649,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(rauht),
 	MLXSW_REG(raleu),
 	MLXSW_REG(rauhtd),
+	MLXSW_REG(rigr2),
+	MLXSW_REG(recr2),
+	MLXSW_REG(rmft2),
 	MLXSW_REG(mfcr),
 	MLXSW_REG(mfsc),
 	MLXSW_REG(mfsm),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 9556d934714b..087aad52c195 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -63,6 +63,7 @@ enum mlxsw_res_id {
 	MLXSW_RES_ID_MAX_CPU_POLICERS,
 	MLXSW_RES_ID_MAX_VRS,
 	MLXSW_RES_ID_MAX_RIFS,
+	MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES,
 	MLXSW_RES_ID_MAX_LPM_TREES,
 
 	/* Internal resources.
@@ -100,6 +101,7 @@ static u16 mlxsw_res_ids[] = {
 	[MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
 	[MLXSW_RES_ID_MAX_VRS] = 0x2C01,
 	[MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
+	[MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10,
 	[MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30,
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 696b99e65a5a..2d46ec84ebdf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -53,6 +53,7 @@
 #include <linux/notifier.h>
 #include <linux/dcbnl.h>
 #include <linux/inetdevice.h>
+#include <linux/netlink.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
@@ -69,11 +70,12 @@
 #include "txheader.h"
 #include "spectrum_cnt.h"
 #include "spectrum_dpipe.h"
+#include "spectrum_acl_flex_actions.h"
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1420
-#define MLXSW_FWREV_SUBMINOR 122
+#define MLXSW_FWREV_MINOR 1530
+#define MLXSW_FWREV_SUBMINOR 152
 
 static const struct mlxsw_fw_rev mlxsw_sp_supported_fw_rev = {
 	.major = MLXSW_FWREV_MAJOR,
@@ -1322,20 +1324,54 @@ out:
 	return err;
 }
 
+static void
+mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
+			    struct mlxsw_sp_port_xstats *xstats)
+{
+	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+	int err, i;
+
+	err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_EXT_CNT, 0,
+					  ppcnt_pl);
+	if (!err)
+		xstats->ecn = mlxsw_reg_ppcnt_ecn_marked_get(ppcnt_pl);
+
+	for (i = 0; i < TC_MAX_QUEUE; i++) {
+		err = mlxsw_sp_port_get_stats_raw(dev,
+						  MLXSW_REG_PPCNT_TC_CONG_TC,
+						  i, ppcnt_pl);
+		if (!err)
+			xstats->wred_drop[i] =
+				mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl);
+
+		err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT,
+						  i, ppcnt_pl);
+		if (err)
+			continue;
+
+		xstats->backlog[i] =
+			mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl);
+		xstats->tail_drop[i] =
+			mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
+	}
+}
+
 static void update_stats_cache(struct work_struct *work)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port =
 		container_of(work, struct mlxsw_sp_port,
-			     hw_stats.update_dw.work);
+			     periodic_hw_stats.update_dw.work);
 
 	if (!netif_carrier_ok(mlxsw_sp_port->dev))
 		goto out;
 
 	mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev,
-				   mlxsw_sp_port->hw_stats.cache);
+				   &mlxsw_sp_port->periodic_hw_stats.stats);
+	mlxsw_sp_port_get_hw_xstats(mlxsw_sp_port->dev,
+				    &mlxsw_sp_port->periodic_hw_stats.xstats);
 
 out:
-	mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw,
+	mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw,
 			       MLXSW_HW_STATS_UPDATE_TIME);
 }
 
@@ -1348,7 +1384,7 @@ mlxsw_sp_port_get_stats64(struct net_device *dev,
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 
-	memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats));
+	memcpy(stats, &mlxsw_sp_port->periodic_hw_stats.stats, sizeof(*stats));
 }
 
 static int __mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -1695,17 +1731,9 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					  struct tc_cls_matchall_offload *f)
+					  struct tc_cls_matchall_offload *f,
+					  bool ingress)
 {
-	bool ingress;
-
-	if (is_classid_clsact_ingress(f->common.classid))
-		ingress = true;
-	else if (is_classid_clsact_egress(f->common.classid))
-		ingress = false;
-	else
-		return -EOPNOTSUPP;
-
 	if (f->common.chain_index)
 		return -EOPNOTSUPP;
 
@@ -1723,17 +1751,9 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
-			     struct tc_cls_flower_offload *f)
+			     struct tc_cls_flower_offload *f,
+			     bool ingress)
 {
-	bool ingress;
-
-	if (is_classid_clsact_ingress(f->common.classid))
-		ingress = true;
-	else if (is_classid_clsact_egress(f->common.classid))
-		ingress = false;
-	else
-		return -EOPNOTSUPP;
-
 	switch (f->command) {
 	case TC_CLSFLOWER_REPLACE:
 		return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f);
@@ -1747,16 +1767,72 @@ mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+static int mlxsw_sp_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				      void *cb_priv, bool ingress)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = cb_priv;
+
+	if (!tc_can_offload(mlxsw_sp_port->dev))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data,
+						      ingress);
+	case TC_SETUP_CLSFLOWER:
+		return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data,
+						    ingress);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlxsw_sp_setup_tc_block_cb_ig(enum tc_setup_type type,
+					 void *type_data, void *cb_priv)
+{
+	return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, true);
+}
+
+static int mlxsw_sp_setup_tc_block_cb_eg(enum tc_setup_type type,
+					 void *type_data, void *cb_priv)
+{
+	return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, false);
+}
+
+static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
+				   struct tc_block_offload *f)
+{
+	tc_setup_cb_t *cb;
+
+	if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		cb = mlxsw_sp_setup_tc_block_cb_ig;
+	else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+		cb = mlxsw_sp_setup_tc_block_cb_eg;
+	else
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
+					     mlxsw_sp_port);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			     void *type_data)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 
 	switch (type) {
-	case TC_SETUP_CLSMATCHALL:
-		return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data);
-	case TC_SETUP_CLSFLOWER:
-		return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data);
+	case TC_SETUP_BLOCK:
+		return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data);
+	case TC_SETUP_QDISC_RED:
+		return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2868,14 +2944,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_alloc_sample;
 	}
 
-	mlxsw_sp_port->hw_stats.cache =
-		kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL);
-
-	if (!mlxsw_sp_port->hw_stats.cache) {
-		err = -ENOMEM;
-		goto err_alloc_hw_stats;
-	}
-	INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw,
+	INIT_DELAYED_WORK(&mlxsw_sp_port->periodic_hw_stats.update_dw,
 			  &update_stats_cache);
 
 	dev->netdev_ops = &mlxsw_sp_port_netdev_ops;
@@ -2974,6 +3043,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	if (IS_ERR(mlxsw_sp_port_vlan)) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
 			mlxsw_sp_port->local_port);
+		err = PTR_ERR(mlxsw_sp_port_vlan);
 		goto err_port_vlan_get;
 	}
 
@@ -2989,7 +3059,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port,
 				mlxsw_sp_port, dev, mlxsw_sp_port->split,
 				module);
-	mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0);
+	mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0);
 	return 0;
 
 err_register_netdev:
@@ -3012,8 +3082,6 @@ err_dev_addr_init:
 err_port_swid_set:
 	mlxsw_sp_port_module_unmap(mlxsw_sp_port);
 err_port_module_map:
-	kfree(mlxsw_sp_port->hw_stats.cache);
-err_alloc_hw_stats:
 	kfree(mlxsw_sp_port->sample);
 err_alloc_sample:
 	free_percpu(mlxsw_sp_port->pcpu_stats);
@@ -3028,7 +3096,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
-	cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw);
+	cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
 	mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp->ports[local_port] = NULL;
@@ -3038,7 +3106,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp_port);
-	kfree(mlxsw_sp_port->hw_stats.cache);
 	kfree(mlxsw_sp_port->sample);
 	free_percpu(mlxsw_sp_port->pcpu_stats);
 	WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vlans_list));
@@ -3311,6 +3378,14 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
 	return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
 }
 
+static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb,
+					      u8 local_port, void *priv)
+{
+	skb->offload_mr_fwd_mark = 1;
+	skb->offload_fwd_mark = 1;
+	return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
+}
+
 static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
 					     void *priv)
 {
@@ -3354,6 +3429,10 @@ out:
 	MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action,	\
 		_is_ctrl, SP_##_trap_group, DISCARD)
 
+#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
+	MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action,	\
+		_is_ctrl, SP_##_trap_group, DISCARD)
+
 #define MLXSW_SP_EVENTL(_func, _trap_id)		\
 	MLXSW_EVENTL(_func, _trap_id, SP_EVENT)
 
@@ -3420,6 +3499,11 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 		  false, SP_IP2ME, DISCARD),
 	/* ACL trap */
 	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false),
+	/* Multicast Router Traps */
+	MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
+	MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
+	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
+	MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
 };
 
 static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
@@ -3445,6 +3529,8 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
 			rate = 128;
 			burst_size = 7;
 			break;
@@ -3460,6 +3546,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 			rate = 1024;
 			burst_size = 7;
 			break;
@@ -3505,6 +3592,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
 			priority = 5;
 			tc = 5;
 			break;
@@ -3521,12 +3609,14 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
 			priority = 2;
 			tc = 2;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 			priority = 1;
 			tc = 1;
 			break;
@@ -3642,6 +3732,9 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
 	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
 }
 
+static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
+				    unsigned long event, void *ptr);
+
 static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 			 const struct mlxsw_bus_info *mlxsw_bus_info)
 {
@@ -3663,10 +3756,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		return err;
 	}
 
+	err = mlxsw_sp_kvdl_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize KVDL\n");
+		return err;
+	}
+
 	err = mlxsw_sp_fids_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n");
-		return err;
+		goto err_fids_init;
 	}
 
 	err = mlxsw_sp_traps_init(mlxsw_sp);
@@ -3693,12 +3792,34 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_switchdev_init;
 	}
 
+	err = mlxsw_sp_counter_pool_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n");
+		goto err_counter_pool_init;
+	}
+
+	err = mlxsw_sp_afa_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL actions\n");
+		goto err_afa_init;
+	}
+
 	err = mlxsw_sp_router_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
 		goto err_router_init;
 	}
 
+	/* Initialize netdevice notifier after router is initialized, so that
+	 * the event handler can use router structures.
+	 */
+	mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
+	err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n");
+		goto err_netdev_notifier;
+	}
+
 	err = mlxsw_sp_span_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
@@ -3711,12 +3832,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_acl_init;
 	}
 
-	err = mlxsw_sp_counter_pool_init(mlxsw_sp);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n");
-		goto err_counter_pool_init;
-	}
-
 	err = mlxsw_sp_dpipe_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n");
@@ -3734,14 +3849,18 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 err_ports_create:
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 err_dpipe_init:
-	mlxsw_sp_counter_pool_fini(mlxsw_sp);
-err_counter_pool_init:
 	mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
 	mlxsw_sp_span_fini(mlxsw_sp);
 err_span_init:
+	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+err_netdev_notifier:
 	mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	mlxsw_sp_afa_fini(mlxsw_sp);
+err_afa_init:
+	mlxsw_sp_counter_pool_fini(mlxsw_sp);
+err_counter_pool_init:
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
 err_switchdev_init:
 	mlxsw_sp_lag_fini(mlxsw_sp);
@@ -3751,6 +3870,8 @@ err_buffers_init:
 	mlxsw_sp_traps_fini(mlxsw_sp);
 err_traps_init:
 	mlxsw_sp_fids_fini(mlxsw_sp);
+err_fids_init:
+	mlxsw_sp_kvdl_fini(mlxsw_sp);
 	return err;
 }
 
@@ -3760,15 +3881,18 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 
 	mlxsw_sp_ports_remove(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
-	mlxsw_sp_counter_pool_fini(mlxsw_sp);
 	mlxsw_sp_acl_fini(mlxsw_sp);
 	mlxsw_sp_span_fini(mlxsw_sp);
+	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 	mlxsw_sp_router_fini(mlxsw_sp);
+	mlxsw_sp_afa_fini(mlxsw_sp);
+	mlxsw_sp_counter_pool_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
 	mlxsw_sp_lag_fini(mlxsw_sp);
 	mlxsw_sp_buffers_fini(mlxsw_sp);
 	mlxsw_sp_traps_fini(mlxsw_sp);
 	mlxsw_sp_fids_fini(mlxsw_sp);
+	mlxsw_sp_kvdl_fini(mlxsw_sp);
 }
 
 static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
@@ -3791,8 +3915,8 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.max_pkey			= 0,
 	.used_kvd_split_data		= 1,
 	.kvd_hash_granularity		= MLXSW_SP_KVD_GRANULARITY,
-	.kvd_hash_single_parts		= 2,
-	.kvd_hash_double_parts		= 1,
+	.kvd_hash_single_parts		= 59,
+	.kvd_hash_double_parts		= 41,
 	.kvd_linear_size		= MLXSW_SP_KVD_LINEAR_SIZE,
 	.swid_config			= {
 		{
@@ -3986,14 +4110,21 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
 static bool
 mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
 			  struct net_device *lag_dev,
-			  struct netdev_lag_upper_info *lag_upper_info)
+			  struct netdev_lag_upper_info *lag_upper_info,
+			  struct netlink_ext_ack *extack)
 {
 	u16 lag_id;
 
-	if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0)
+	if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
+		NL_SET_ERR_MSG(extack,
+			       "spectrum: Exceeded number of supported LAG devices");
 		return false;
-	if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+	}
+	if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+		NL_SET_ERR_MSG(extack,
+			       "spectrum: LAG device using unsupported Tx type");
 		return false;
+	}
 	return true;
 }
 
@@ -4198,6 +4329,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 {
 	struct netdev_notifier_changeupper_info *info;
 	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct netlink_ext_ack *extack;
 	struct net_device *upper_dev;
 	struct mlxsw_sp *mlxsw_sp;
 	int err = 0;
@@ -4205,6 +4337,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 	mlxsw_sp_port = netdev_priv(dev);
 	mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	info = ptr;
+	extack = netdev_notifier_info_to_extack(&info->info);
 
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
@@ -4212,25 +4345,43 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		if (!is_vlan_dev(upper_dev) &&
 		    !netif_is_lag_master(upper_dev) &&
 		    !netif_is_bridge_master(upper_dev) &&
-		    !netif_is_ovs_master(upper_dev))
+		    !netif_is_ovs_master(upper_dev)) {
+			NL_SET_ERR_MSG(extack,
+				       "spectrum: Unknown upper device type");
 			return -EINVAL;
+		}
 		if (!info->linking)
 			break;
-		if (netdev_has_any_upper_dev(upper_dev))
+		if (netdev_has_any_upper_dev(upper_dev)) {
+			NL_SET_ERR_MSG(extack,
+				       "spectrum: Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
+		}
 		if (netif_is_lag_master(upper_dev) &&
 		    !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
-					       info->upper_info))
+					       info->upper_info, extack))
 			return -EINVAL;
-		if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev))
+		if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
+			NL_SET_ERR_MSG(extack,
+				       "spectrum: Master device is a LAG master and this device has a VLAN");
 			return -EINVAL;
+		}
 		if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
-		    !netif_is_lag_master(vlan_dev_real_dev(upper_dev)))
+		    !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
+			NL_SET_ERR_MSG(extack,
+				       "spectrum: Can not put a VLAN on a LAG port");
 			return -EINVAL;
-		if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev))
+		}
+		if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
+			NL_SET_ERR_MSG(extack,
+				       "spectrum: Master device is an OVS master and this device has a VLAN");
 			return -EINVAL;
-		if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev))
+		}
+		if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
+			NL_SET_ERR_MSG(extack,
+				       "spectrum: Can not put a VLAN on an OVS port");
 			return -EINVAL;
+		}
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
@@ -4238,7 +4389,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 			if (info->linking)
 				err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
 								lower_dev,
-								upper_dev);
+								upper_dev,
+								extack);
 			else
 				mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
 							   lower_dev,
@@ -4329,18 +4481,25 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
 	struct net_device *upper_dev;
 	int err = 0;
 
+	extack = netdev_notifier_info_to_extack(&info->info);
+
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
-		if (!netif_is_bridge_master(upper_dev))
+		if (!netif_is_bridge_master(upper_dev)) {
+			NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers");
 			return -EINVAL;
+		}
 		if (!info->linking)
 			break;
-		if (netdev_has_any_upper_dev(upper_dev))
+		if (netdev_has_any_upper_dev(upper_dev)) {
+			NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
+		}
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
@@ -4348,7 +4507,8 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 			if (info->linking)
 				err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
 								vlan_dev,
-								upper_dev);
+								upper_dev,
+								extack);
 			else
 				mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
 							   vlan_dev,
@@ -4411,13 +4571,21 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
 	return netif_is_l3_master(info->upper_dev);
 }
 
-static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
+static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct mlxsw_sp *mlxsw_sp;
 	int err = 0;
 
-	if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
+	mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+	if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
+		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
+						       event, ptr);
+	else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev))
+		err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev,
+						       event, ptr);
+	else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
 		err = mlxsw_sp_netdevice_router_port_event(dev);
 	else if (mlxsw_sp_is_vrf_event(event, ptr))
 		err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
@@ -4431,21 +4599,20 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 	return notifier_from_errno(err);
 }
 
-static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = {
-	.notifier_call = mlxsw_sp_netdevice_event,
+static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_inetaddr_valid_event,
 };
 
 static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_inetaddr_event,
-	.priority = 10,	/* Must be called before FIB notifier block */
 };
 
-static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
-	.notifier_call = mlxsw_sp_inet6addr_event,
+static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_inet6addr_valid_event,
 };
 
-static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
-	.notifier_call = mlxsw_sp_router_netevent_event,
+static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_inet6addr_event,
 };
 
 static const struct pci_device_id mlxsw_sp_pci_id_table[] = {
@@ -4462,10 +4629,10 @@ static int __init mlxsw_sp_module_init(void)
 {
 	int err;
 
-	register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+	register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
 	register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+	register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
 	register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
-	register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 
 	err = mlxsw_core_driver_register(&mlxsw_sp_driver);
 	if (err)
@@ -4480,10 +4647,10 @@ static int __init mlxsw_sp_module_init(void)
 err_pci_driver_register:
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 err_core_driver_register:
-	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
+	unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
-	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+	unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
 	return err;
 }
 
@@ -4491,10 +4658,10 @@ static void __exit mlxsw_sp_module_exit(void)
 {
 	mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
-	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
+	unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
-	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+	unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
 }
 
 module_init(mlxsw_sp_module_init);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 84ce83acdc19..58cf222fb985 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -48,6 +48,7 @@
 #include <linux/notifier.h>
 #include <net/psample.h>
 #include <net/pkt_cls.h>
+#include <net/red.h>
 
 #include "port.h"
 #include "core.h"
@@ -62,7 +63,7 @@
 
 #define MLXSW_SP_PORT_BASE_SPEED 25000	/* Mb/s */
 
-#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
+#define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */
 #define MLXSW_SP_KVD_GRANULARITY 128
 
 struct mlxsw_sp_port;
@@ -94,7 +95,8 @@ struct mlxsw_sp_mid {
 	unsigned char addr[ETH_ALEN];
 	u16 fid;
 	u16 mid;
-	unsigned int ref_count;
+	bool in_hw;
+	unsigned long *ports_in_mid; /* bits array */
 };
 
 enum mlxsw_sp_span_type {
@@ -138,9 +140,11 @@ struct mlxsw_sp_port_mall_tc_entry {
 struct mlxsw_sp_sb;
 struct mlxsw_sp_bridge;
 struct mlxsw_sp_router;
+struct mlxsw_sp_mr;
 struct mlxsw_sp_acl;
 struct mlxsw_sp_counter_pool;
 struct mlxsw_sp_fid_core;
+struct mlxsw_sp_kvdl;
 
 struct mlxsw_sp {
 	struct mlxsw_sp_port **ports;
@@ -152,11 +156,12 @@ struct mlxsw_sp {
 	struct mlxsw_sp_sb *sb;
 	struct mlxsw_sp_bridge *bridge;
 	struct mlxsw_sp_router *router;
+	struct mlxsw_sp_mr *mr;
+	struct mlxsw_afa *afa;
 	struct mlxsw_sp_acl *acl;
 	struct mlxsw_sp_fid_core *fid_core;
-	struct {
-		DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
-	} kvdl;
+	struct mlxsw_sp_kvdl *kvdl;
+	struct notifier_block netdevice_nb;
 
 	struct mlxsw_sp_counter_pool *counter_pool;
 	struct {
@@ -199,6 +204,37 @@ struct mlxsw_sp_port_vlan {
 	struct list_head bridge_vlan_node;
 };
 
+enum mlxsw_sp_qdisc_type {
+	MLXSW_SP_QDISC_NO_QDISC,
+	MLXSW_SP_QDISC_RED,
+};
+
+struct mlxsw_sp_qdisc {
+	u32 handle;
+	enum mlxsw_sp_qdisc_type type;
+	struct red_stats xstats_base;
+	union {
+		struct {
+			u64 tail_drop_base;
+			u64 ecn_base;
+			u64 wred_drop_base;
+		} red;
+	} xstats;
+
+	u64 tx_bytes;
+	u64 tx_packets;
+	u64 drops;
+	u64 overlimits;
+};
+
+/* No need an internal lock; At worse - miss a single periodic iteration */
+struct mlxsw_sp_port_xstats {
+	u64 ecn;
+	u64 wred_drop[TC_MAX_QUEUE];
+	u64 tail_drop[TC_MAX_QUEUE];
+	u64 backlog[TC_MAX_QUEUE];
+};
+
 struct mlxsw_sp_port {
 	struct net_device *dev;
 	struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats;
@@ -227,11 +263,13 @@ struct mlxsw_sp_port {
 	struct list_head mall_tc_list;
 	struct {
 		#define MLXSW_HW_STATS_UPDATE_TIME HZ
-		struct rtnl_link_stats64 *cache;
+		struct rtnl_link_stats64 stats;
+		struct mlxsw_sp_port_xstats xstats;
 		struct delayed_work update_dw;
-	} hw_stats;
+	} periodic_hw_stats;
 	struct mlxsw_sp_port_sample *sample;
 	struct list_head vlans_list;
+	struct mlxsw_sp_qdisc root_qdisc;
 };
 
 static inline bool
@@ -322,7 +360,8 @@ void
 mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
 int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct net_device *brport_dev,
-			      struct net_device *br_dev);
+			      struct net_device *br_dev,
+			      struct netlink_ext_ack *extack);
 void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
 				struct net_device *brport_dev,
 				struct net_device *br_dev);
@@ -380,23 +419,43 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 /* spectrum_router.c */
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
-				   unsigned long event, void *ptr);
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 			    unsigned long event, void *ptr);
+int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
+				  unsigned long event, void *ptr);
 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
 			     unsigned long event, void *ptr);
+int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
+				   unsigned long event, void *ptr);
 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
 				 struct netdev_notifier_changeupper_info *info);
+bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
+				const struct net_device *dev);
+bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
+				const struct net_device *dev);
+int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
+				     struct net_device *l3_dev,
+				     unsigned long event,
+				     struct netdev_notifier_info *info);
+int
+mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
+				 struct net_device *l3_dev,
+				 unsigned long event,
+				 struct netdev_notifier_info *info);
 void
 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 
 /* spectrum_kvdl.c */
+int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
 			u32 *p_entry_index);
 void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
+int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+				   unsigned int entry_count,
+				   unsigned int *p_alloc_size);
 
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
@@ -466,9 +525,9 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
 				    enum mlxsw_afk_element element,
 				    const char *key_value,
 				    const char *mask_value, unsigned int len);
-void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
-void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
-				 u16 group_id);
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+				u16 group_id);
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
@@ -521,6 +580,10 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 			  struct tc_cls_flower_offload *f);
 
+/* spectrum_qdisc.c */
+int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_red_qopt_offload *p);
+
 /* spectrum_fid.c */
 int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
 			   enum mlxsw_sp_flood_type packet_type, u8 local_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 4b2455e3e079..93dcd315f7d6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -52,7 +52,6 @@
 struct mlxsw_sp_acl {
 	struct mlxsw_sp *mlxsw_sp;
 	struct mlxsw_afk *afk;
-	struct mlxsw_afa *afa;
 	struct mlxsw_sp_fid *dummy_fid;
 	const struct mlxsw_sp_acl_ops *ops;
 	struct rhashtable ruleset_ht;
@@ -333,7 +332,7 @@ mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl)
 	rulei = kzalloc(sizeof(*rulei), GFP_KERNEL);
 	if (!rulei)
 		return NULL;
-	rulei->act_block = mlxsw_afa_block_create(acl->afa);
+	rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa);
 	if (IS_ERR(rulei->act_block)) {
 		err = PTR_ERR(rulei->act_block);
 		goto err_afa_block_create;
@@ -379,15 +378,15 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
 				 key_value, mask_value, len);
 }
 
-void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei)
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei)
 {
-	mlxsw_afa_block_continue(rulei->act_block);
+	return mlxsw_afa_block_continue(rulei->act_block);
 }
 
-void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
-				 u16 group_id)
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+				u16 group_id)
 {
-	mlxsw_afa_block_jump(rulei->act_block, group_id);
+	return mlxsw_afa_block_jump(rulei->act_block, group_id);
 }
 
 int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
@@ -397,7 +396,8 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
 
 int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei)
 {
-	return mlxsw_afa_block_append_trap(rulei->act_block);
+	return mlxsw_afa_block_append_trap(rulei->act_block,
+					   MLXSW_TRAP_ID_ACL0);
 }
 
 int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
@@ -653,85 +653,6 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-#define MLXSW_SP_KDVL_ACT_EXT_SIZE 1
-
-static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
-				     char *enc_actions, bool is_first)
-{
-	struct mlxsw_sp *mlxsw_sp = priv;
-	char pefa_pl[MLXSW_REG_PEFA_LEN];
-	u32 kvdl_index;
-	int err;
-
-	/* The first action set of a TCAM entry is stored directly in TCAM,
-	 * not KVD linear area.
-	 */
-	if (is_first)
-		return 0;
-
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE,
-				  &kvdl_index);
-	if (err)
-		return err;
-	mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
-	if (err)
-		goto err_pefa_write;
-	*p_kvdl_index = kvdl_index;
-	return 0;
-
-err_pefa_write:
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
-	return err;
-}
-
-static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
-				      bool is_first)
-{
-	struct mlxsw_sp *mlxsw_sp = priv;
-
-	if (is_first)
-		return;
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
-}
-
-static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
-					   u8 local_port)
-{
-	struct mlxsw_sp *mlxsw_sp = priv;
-	char ppbs_pl[MLXSW_REG_PPBS_LEN];
-	u32 kvdl_index;
-	int err;
-
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
-	if (err)
-		return err;
-	mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl);
-	if (err)
-		goto err_ppbs_write;
-	*p_kvdl_index = kvdl_index;
-	return 0;
-
-err_ppbs_write:
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
-	return err;
-}
-
-static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
-{
-	struct mlxsw_sp *mlxsw_sp = priv;
-
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
-}
-
-static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
-	.kvdl_set_add		= mlxsw_sp_act_kvdl_set_add,
-	.kvdl_set_del		= mlxsw_sp_act_kvdl_set_del,
-	.kvdl_fwd_entry_add	= mlxsw_sp_act_kvdl_fwd_entry_add,
-	.kvdl_fwd_entry_del	= mlxsw_sp_act_kvdl_fwd_entry_del,
-};
-
 int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 {
 	const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops;
@@ -753,14 +674,6 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 		goto err_afk_create;
 	}
 
-	acl->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
-						       ACL_ACTIONS_PER_SET),
-				    &mlxsw_sp_act_afa_ops, mlxsw_sp);
-	if (IS_ERR(acl->afa)) {
-		err = PTR_ERR(acl->afa);
-		goto err_afa_create;
-	}
-
 	err = rhashtable_init(&acl->ruleset_ht,
 			      &mlxsw_sp_acl_ruleset_ht_params);
 	if (err)
@@ -792,8 +705,6 @@ err_acl_ops_init:
 err_fid_get:
 	rhashtable_destroy(&acl->ruleset_ht);
 err_rhashtable_init:
-	mlxsw_afa_destroy(acl->afa);
-err_afa_create:
 	mlxsw_afk_destroy(acl->afk);
 err_afk_create:
 	kfree(acl);
@@ -810,7 +721,6 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
 	WARN_ON(!list_empty(&acl->rules));
 	mlxsw_sp_fid_put(acl->dummy_fid);
 	rhashtable_destroy(&acl->ruleset_ht);
-	mlxsw_afa_destroy(acl->afa);
 	mlxsw_afk_destroy(acl->afk);
 	kfree(acl);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
new file mode 100644
index 000000000000..4d3340ed0291
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -0,0 +1,129 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spectrum_acl_flex_actions.h"
+#include "core_acl_flex_actions.h"
+
+#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
+
+static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
+				     char *enc_actions, bool is_first)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+	char pefa_pl[MLXSW_REG_PEFA_LEN];
+	u32 kvdl_index;
+	int err;
+
+	/* The first action set of a TCAM entry is stored directly in TCAM,
+	 * not KVD linear area.
+	 */
+	if (is_first)
+		return 0;
+
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE,
+				  &kvdl_index);
+	if (err)
+		return err;
+	mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
+	if (err)
+		goto err_pefa_write;
+	*p_kvdl_index = kvdl_index;
+	return 0;
+
+err_pefa_write:
+	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	return err;
+}
+
+static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
+				      bool is_first)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	if (is_first)
+		return;
+	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+}
+
+static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
+					   u8 local_port)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+	char ppbs_pl[MLXSW_REG_PPBS_LEN];
+	u32 kvdl_index;
+	int err;
+
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
+	if (err)
+		return err;
+	mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl);
+	if (err)
+		goto err_ppbs_write;
+	*p_kvdl_index = kvdl_index;
+	return 0;
+
+err_ppbs_write:
+	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	return err;
+}
+
+static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+}
+
+static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
+	.kvdl_set_add		= mlxsw_sp_act_kvdl_set_add,
+	.kvdl_set_del		= mlxsw_sp_act_kvdl_set_del,
+	.kvdl_fwd_entry_add	= mlxsw_sp_act_kvdl_fwd_entry_add,
+	.kvdl_fwd_entry_del	= mlxsw_sp_act_kvdl_fwd_entry_del,
+};
+
+int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
+							    ACL_ACTIONS_PER_SET),
+					 &mlxsw_sp_act_afa_ops, mlxsw_sp);
+	return PTR_ERR_OR_ZERO(mlxsw_sp->afa);
+}
+
+void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_afa_destroy(mlxsw_sp->afa);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
new file mode 100644
index 000000000000..2726192836ad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
@@ -0,0 +1,44 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
+#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
+
+#include "spectrum.h"
+
+int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 50b40de1fb91..7e8284b46968 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -608,7 +608,10 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
 		goto err_rulei_create;
 	}
 
-	mlxsw_sp_acl_rulei_act_continue(rulei);
+	err = mlxsw_sp_acl_rulei_act_continue(rulei);
+	if (WARN_ON(err))
+		goto err_rulei_act_continue;
+
 	err = mlxsw_sp_acl_rulei_commit(rulei);
 	if (err)
 		goto err_rulei_commit;
@@ -623,6 +626,7 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
 
 err_rule_insert:
 err_rulei_commit:
+err_rulei_act_continue:
 	mlxsw_sp_acl_rulei_destroy(rulei);
 err_rulei_create:
 	parman_item_remove(region->parman, parman_prio, parman_item);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 51e6846da72b..96fdba78acab 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -43,21 +43,42 @@ enum mlxsw_sp_field_metadata_id {
 	MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
 	MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
 	MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+	MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
+	MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE,
+	MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
 };
 
 static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = {
-	{ .name = "erif_port",
-	  .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
-	  .bitwidth = 32,
-	  .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
+	{
+		.name = "erif_port",
+		.id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
+		.bitwidth = 32,
+		.mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
 	},
-	{ .name = "l3_forward",
-	  .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
-	  .bitwidth = 1,
+	{
+		.name = "l3_forward",
+		.id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
+		.bitwidth = 1,
 	},
-	{ .name = "l3_drop",
-	  .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
-	  .bitwidth = 1,
+	{
+		.name = "l3_drop",
+		.id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+		.bitwidth = 1,
+	},
+	{
+		.name = "adj_index",
+		.id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
+		.bitwidth = 32,
+	},
+	{
+		.name = "adj_size",
+		.id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE,
+		.bitwidth = 32,
+	},
+	{
+		.name = "adj_hash_index",
+		.id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
+		.bitwidth = 32,
 	},
 };
 
@@ -826,6 +847,390 @@ static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp)
 				       MLXSW_SP_DPIPE_TABLE_NAME_HOST6);
 }
 
+static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv,
+						 struct sk_buff *skb)
+{
+	struct devlink_dpipe_match match = {0};
+	int err;
+
+	match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match.header = &mlxsw_sp_dpipe_header_metadata;
+	match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
+
+	err = devlink_dpipe_match_put(skb, &match);
+	if (err)
+		return err;
+
+	match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match.header = &mlxsw_sp_dpipe_header_metadata;
+	match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE;
+
+	err = devlink_dpipe_match_put(skb, &match);
+	if (err)
+		return err;
+
+	match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match.header = &mlxsw_sp_dpipe_header_metadata;
+	match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX;
+
+	return devlink_dpipe_match_put(skb, &match);
+}
+
+static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv,
+						 struct sk_buff *skb)
+{
+	struct devlink_dpipe_action action = {0};
+	int err;
+
+	action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+	action.header = &devlink_dpipe_header_ethernet;
+	action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+	err = devlink_dpipe_action_put(skb, &action);
+	if (err)
+		return err;
+
+	action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+	action.header = &mlxsw_sp_dpipe_header_metadata;
+	action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+	return devlink_dpipe_action_put(skb, &action);
+}
+
+static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_nexthop *nh;
+	u64 size = 0;
+
+	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
+		if (mlxsw_sp_nexthop_offload(nh) &&
+		    !mlxsw_sp_nexthop_group_has_ipip(nh))
+			size++;
+	return size;
+}
+
+enum mlxsw_sp_dpipe_table_adj_match {
+	MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX,
+	MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE,
+	MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX,
+	MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT,
+};
+
+enum mlxsw_sp_dpipe_table_adj_action {
+	MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC,
+	MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT,
+	MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT,
+};
+
+static void
+mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches,
+					      struct devlink_dpipe_action *actions)
+{
+	struct devlink_dpipe_action *action;
+	struct devlink_dpipe_match *match;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+	match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match->header = &mlxsw_sp_dpipe_header_metadata;
+	match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+	match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match->header = &mlxsw_sp_dpipe_header_metadata;
+	match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+	match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match->header = &mlxsw_sp_dpipe_header_metadata;
+	match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX;
+
+	action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+	action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+	action->header = &devlink_dpipe_header_ethernet;
+	action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+	action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+	action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+	action->header = &mlxsw_sp_dpipe_header_metadata;
+	action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry,
+				       struct devlink_dpipe_value *match_values,
+				       struct devlink_dpipe_match *matches,
+				       struct devlink_dpipe_value *action_values,
+				       struct devlink_dpipe_action *actions)
+{	struct devlink_dpipe_value *action_value;
+	struct devlink_dpipe_value *match_value;
+	struct devlink_dpipe_action *action;
+	struct devlink_dpipe_match *match;
+
+	entry->match_values = match_values;
+	entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT;
+
+	entry->action_values = action_values;
+	entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+	match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+
+	match_value->match = match;
+	match_value->value_size = sizeof(u32);
+	match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+	if (!match_value->value)
+		return -ENOMEM;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+	match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+
+	match_value->match = match;
+	match_value->value_size = sizeof(u32);
+	match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+	if (!match_value->value)
+		return -ENOMEM;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+	match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+
+	match_value->match = match;
+	match_value->value_size = sizeof(u32);
+	match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+	if (!match_value->value)
+		return -ENOMEM;
+
+	action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+	action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+
+	action_value->action = action;
+	action_value->value_size = sizeof(u64);
+	action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+	if (!action_value->value)
+		return -ENOMEM;
+
+	action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+	action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+
+	action_value->action = action;
+	action_value->value_size = sizeof(u32);
+	action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+	if (!action_value->value)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void
+__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry,
+				      u32 adj_index, u32 adj_size,
+				      u32 adj_hash_index, unsigned char *ha,
+				      struct mlxsw_sp_rif *rif)
+{
+	struct devlink_dpipe_value *value;
+	u32 *p_rif_value;
+	u32 *p_index;
+
+	value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+	p_index = value->value;
+	*p_index = adj_index;
+
+	value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE];
+	p_index = value->value;
+	*p_index = adj_size;
+
+	value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+	p_index = value->value;
+	*p_index = adj_hash_index;
+
+	value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+	ether_addr_copy(value->value, ha);
+
+	value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+	p_rif_value = value->value;
+	*p_rif_value = mlxsw_sp_rif_index(rif);
+	value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif);
+	value->mapping_valid = true;
+}
+
+static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp,
+						struct mlxsw_sp_nexthop *nh,
+						struct devlink_dpipe_entry *entry)
+{
+	struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh);
+	unsigned char *ha = mlxsw_sp_nexthop_ha(nh);
+	u32 adj_hash_index = 0;
+	u32 adj_index = 0;
+	u32 adj_size = 0;
+	int err;
+
+	mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, &adj_hash_index);
+	__mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, adj_size,
+					      adj_hash_index, ha, rif);
+	err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter);
+	if (!err)
+		entry->counter_valid = true;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
+				     struct devlink_dpipe_entry *entry,
+				     bool counters_enabled,
+				     struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+	struct mlxsw_sp_nexthop *nh;
+	int entry_index = 0;
+	int nh_count_max;
+	int nh_count = 0;
+	int nh_skip;
+	int j;
+	int err;
+
+	rtnl_lock();
+	nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
+start_again:
+	err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
+	if (err)
+		goto err_ctx_prepare;
+	j = 0;
+	nh_skip = nh_count;
+	nh_count = 0;
+	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+		if (!mlxsw_sp_nexthop_offload(nh) ||
+		    mlxsw_sp_nexthop_group_has_ipip(nh))
+			continue;
+
+		if (nh_count < nh_skip)
+			goto skip;
+
+		mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry);
+		entry->index = entry_index;
+		err = devlink_dpipe_entry_ctx_append(dump_ctx, entry);
+		if (err) {
+			if (err == -EMSGSIZE) {
+				if (!j)
+					goto err_entry_append;
+				break;
+			}
+			goto err_entry_append;
+		}
+		entry_index++;
+		j++;
+skip:
+		nh_count++;
+	}
+
+	devlink_dpipe_entry_ctx_close(dump_ctx);
+	if (nh_count != nh_count_max)
+		goto start_again;
+	rtnl_unlock();
+
+	return 0;
+
+err_ctx_prepare:
+err_entry_append:
+	rtnl_unlock();
+	return err;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled,
+				      struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+	struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT];
+	struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT];
+	struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT];
+	struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT];
+	struct devlink_dpipe_entry entry = {0};
+	struct mlxsw_sp *mlxsw_sp = priv;
+	int err;
+
+	memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT *
+			   sizeof(matches[0]));
+	memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT *
+				sizeof(match_values[0]));
+	memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT *
+			   sizeof(actions[0]));
+	memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT *
+				 sizeof(action_values[0]));
+
+	mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions);
+	err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry,
+						     match_values, matches,
+						     action_values, actions);
+	if (err)
+		goto out;
+
+	err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry,
+						   counters_enabled, dump_ctx);
+out:
+	devlink_dpipe_entry_clear(&entry);
+	return err;
+}
+
+static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+	struct mlxsw_sp_nexthop *nh;
+	u32 adj_hash_index = 0;
+	u32 adj_index = 0;
+	u32 adj_size = 0;
+
+	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+		if (!mlxsw_sp_nexthop_offload(nh) ||
+		    mlxsw_sp_nexthop_group_has_ipip(nh))
+			continue;
+
+		mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
+					 &adj_hash_index);
+		if (enable)
+			mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+		else
+			mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+		mlxsw_sp_nexthop_update(mlxsw_sp,
+					adj_index + adj_hash_index, nh);
+	}
+	return 0;
+}
+
+static u64
+mlxsw_sp_dpipe_table_adj_size_get(void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+	u64 size;
+
+	rtnl_lock();
+	size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
+	rtnl_unlock();
+
+	return size;
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = {
+	.matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump,
+	.actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump,
+	.entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump,
+	.counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update,
+	.size_get = mlxsw_sp_dpipe_table_adj_size_get,
+};
+
+static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	return devlink_dpipe_table_register(devlink,
+					    MLXSW_SP_DPIPE_TABLE_NAME_ADJ,
+					    &mlxsw_sp_dpipe_table_adj_ops,
+					    mlxsw_sp, false);
+}
+
+static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	devlink_dpipe_table_unregister(devlink,
+				       MLXSW_SP_DPIPE_TABLE_NAME_ADJ);
+}
+
 int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
@@ -846,8 +1251,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
 	err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp);
 	if (err)
 		goto err_host6_table_init;
-	return 0;
 
+	err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp);
+	if (err)
+		goto err_adj_table_init;
+
+	return 0;
+err_adj_table_init:
+	mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp);
 err_host6_table_init:
 	mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
 err_host4_table_init:
@@ -861,6 +1272,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 
+	mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp);
 	mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp);
 	mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
 	mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
index 283fde4e6783..815d543cf114 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
@@ -56,5 +56,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
 #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif"
 #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4"
 #define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6"
+#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj"
 
 #endif /* _MLXSW_PIPELINE_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 8aace9a06a5d..2f0e57857ea4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -63,7 +63,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 
 	tcf_exts_to_list(exts, &actions);
 	list_for_each_entry(a, &actions, list) {
-		if (is_tcf_gact_shot(a)) {
+		if (is_tcf_gact_ok(a)) {
+			err = mlxsw_sp_acl_rulei_act_continue(rulei);
+			if (err)
+				return err;
+		} else if (is_tcf_gact_shot(a)) {
 			err = mlxsw_sp_acl_rulei_act_drop(rulei);
 			if (err)
 				return err;
@@ -84,7 +88,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 				return PTR_ERR(ruleset);
 
 			group_id = mlxsw_sp_acl_ruleset_group_id(ruleset);
-			mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
+			err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
+			if (err)
+				return err;
 		} else if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 702fe945227c..7502e53447bd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -36,36 +36,123 @@
 
 #include "spectrum_ipip.h"
 
-static bool
-mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
+struct ip_tunnel_parm
+mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev)
 {
 	struct ip_tunnel *tun = netdev_priv(ol_dev);
 
-	return !!(tun->parms.i_flags & TUNNEL_KEY);
+	return tun->parms;
 }
 
-static bool
-mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms)
 {
-	struct ip_tunnel *tun = netdev_priv(ol_dev);
+	return !!(parms.i_flags & TUNNEL_KEY);
+}
 
-	return !!(tun->parms.o_flags & TUNNEL_KEY);
+static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms)
+{
+	return !!(parms.o_flags & TUNNEL_KEY);
 }
 
-static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
+static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms)
 {
-	struct ip_tunnel *tun = netdev_priv(ol_dev);
+	return mlxsw_sp_ipip_parms_has_ikey(parms) ?
+		be32_to_cpu(parms.i_key) : 0;
+}
+
+static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms)
+{
+	return mlxsw_sp_ipip_parms_has_okey(parms) ?
+		be32_to_cpu(parms.o_key) : 0;
+}
 
-	return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ?
-		be32_to_cpu(tun->parms.i_key) : 0;
+static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms)
+{
+	return parms.iph.saddr;
+}
+
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto,
+			  struct ip_tunnel_parm parms)
+{
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return (union mlxsw_sp_l3addr) {
+			.addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
+		};
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	}
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {
+		.addr4 = 0,
+	};
+}
+
+static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms)
+{
+	return parms.iph.daddr;
+}
+
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
+			  struct ip_tunnel_parm parms)
+{
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return (union mlxsw_sp_l3addr) {
+			.addr4 = mlxsw_sp_ipip_parms_daddr4(parms),
+		};
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	}
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {
+		.addr4 = 0,
+	};
+}
+
+static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
+{
+	return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+}
+
+static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+{
+	return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+}
+
+static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
+{
+	return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
 }
 
 static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
 {
-	struct ip_tunnel *tun = netdev_priv(ol_dev);
+	return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev)
+{
+	return mlxsw_sp_ipip_parms_saddr(proto,
+					 mlxsw_sp_ipip_netdev_parms(ol_dev));
+}
+
+static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
+{
+	return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
+}
 
-	return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
-		be32_to_cpu(tun->parms.o_key) : 0;
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev)
+{
+	return mlxsw_sp_ipip_parms_daddr(proto,
+					 mlxsw_sp_ipip_netdev_parms(ol_dev));
 }
 
 static int
@@ -200,6 +287,73 @@ mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
 	};
 }
 
+static int
+mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_ipip_entry *ipip_entry,
+				    struct netlink_ext_ack *extack)
+{
+	union mlxsw_sp_l3addr old_saddr, new_saddr;
+	union mlxsw_sp_l3addr old_daddr, new_daddr;
+	struct ip_tunnel_parm new_parms;
+	bool update_tunnel = false;
+	bool update_decap = false;
+	bool update_nhs = false;
+	int err = 0;
+
+	new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev);
+
+	new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
+					      new_parms);
+	old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
+					      ipip_entry->parms);
+	new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
+					      new_parms);
+	old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
+					      ipip_entry->parms);
+
+	if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
+		u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+
+		/* Since the local address has changed, if there is another
+		 * tunnel with a matching saddr, both need to be demoted.
+		 */
+		if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp,
+							 MLXSW_SP_L3_PROTO_IPV4,
+							 new_saddr, ul_tb_id,
+							 ipip_entry)) {
+			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+			return 0;
+		}
+
+		update_tunnel = true;
+	} else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) !=
+		    mlxsw_sp_ipip_parms_okey(new_parms)) ||
+		   ipip_entry->parms.link != new_parms.link) {
+		update_tunnel = true;
+	} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
+		update_nhs = true;
+	} else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) !=
+		   mlxsw_sp_ipip_parms_ikey(new_parms)) {
+		update_decap = true;
+	}
+
+	if (update_tunnel)
+		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+							  true, true, true,
+							  extack);
+	else if (update_nhs)
+		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+							  false, false, true,
+							  extack);
+	else if (update_decap)
+		err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+							  false, false, false,
+							  extack);
+
+	ipip_entry->parms = new_parms;
+	return err;
+}
+
 static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
 	.dev_type = ARPHRD_IPGRE,
 	.ul_proto = MLXSW_SP_L3_PROTO_IPV4,
@@ -207,6 +361,7 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
 	.fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
 	.can_offload = mlxsw_sp_ipip_can_offload_gre4,
 	.ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
+	.ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4,
 };
 
 const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 1c2db831d83b..04b08d9d76e9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -38,6 +38,13 @@
 #include "spectrum_router.h"
 #include <net/ip_fib.h>
 
+struct ip_tunnel_parm
+mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev);
+
 enum mlxsw_sp_ipip_type {
 	MLXSW_SP_IPIP_TYPE_GRE4,
 	MLXSW_SP_IPIP_TYPE_MAX,
@@ -47,9 +54,9 @@ struct mlxsw_sp_ipip_entry {
 	enum mlxsw_sp_ipip_type ipipt;
 	struct net_device *ol_dev; /* Overlay. */
 	struct mlxsw_sp_rif_ipip_lb *ol_lb;
-	unsigned int ref_count; /* Number of next hops using the tunnel. */
 	struct mlxsw_sp_fib_entry *decap_fib_entry;
 	struct list_head ipip_list_node;
+	struct ip_tunnel_parm parms;
 };
 
 struct mlxsw_sp_ipip_ops {
@@ -72,6 +79,10 @@ struct mlxsw_sp_ipip_ops {
 			    struct mlxsw_sp_ipip_entry *ipip_entry,
 			    enum mlxsw_reg_ralue_op op,
 			    u32 tunnel_index);
+
+	int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_ipip_entry *ipip_entry,
+				struct netlink_ext_ack *extack);
 };
 
 extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 26c26cd30c3d..310c38247b5c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -39,55 +39,276 @@
 
 #define MLXSW_SP_KVDL_SINGLE_BASE 0
 #define MLXSW_SP_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP_KVDL_SINGLE_END \
+	(MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1)
+
 #define MLXSW_SP_KVDL_CHUNKS_BASE \
 	(MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
-#define MLXSW_SP_KVDL_CHUNKS_SIZE \
-	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE)
+#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152
+#define MLXSW_SP_KVDL_CHUNKS_END \
+	(MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1)
+
+#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \
+	(MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE)
+#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \
+	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE)
+#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \
+	(MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1)
+
 #define MLXSW_SP_CHUNK_MAX 32
+#define MLXSW_SP_LARGE_CHUNK_MAX 512
+
+struct mlxsw_sp_kvdl_part_info {
+	unsigned int part_index;
+	unsigned int start_index;
+	unsigned int end_index;
+	unsigned int alloc_size;
+};
+
+struct mlxsw_sp_kvdl_part {
+	struct list_head list;
+	const struct mlxsw_sp_kvdl_part_info *info;
+	unsigned long usage[0];	/* Entries */
+};
+
+struct mlxsw_sp_kvdl {
+	struct list_head parts_list;
+};
+
+static struct mlxsw_sp_kvdl_part *
+mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl,
+			      unsigned int alloc_size)
+{
+	struct mlxsw_sp_kvdl_part *part, *min_part = NULL;
+
+	list_for_each_entry(part, &kvdl->parts_list, list) {
+		if (alloc_size <= part->info->alloc_size &&
+		    (!min_part ||
+		     part->info->alloc_size <= min_part->info->alloc_size))
+			min_part = part;
+	}
+
+	return min_part ?: ERR_PTR(-ENOBUFS);
+}
+
+static struct mlxsw_sp_kvdl_part *
+mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index)
+{
+	struct mlxsw_sp_kvdl_part *part;
+
+	list_for_each_entry(part, &kvdl->parts_list, list) {
+		if (kvdl_index >= part->info->start_index &&
+		    kvdl_index <= part->info->end_index)
+			return part;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static u32
+mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info,
+				unsigned int entry_index)
+{
+	return info->start_index + entry_index * info->alloc_size;
+}
+
+static unsigned int
+mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info,
+				u32 kvdl_index)
+{
+	return (kvdl_index - info->start_index) / info->alloc_size;
+}
+
+static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
+				    u32 *p_kvdl_index)
+{
+	const struct mlxsw_sp_kvdl_part_info *info = part->info;
+	unsigned int entry_index, nr_entries;
+
+	nr_entries = (info->end_index - info->start_index + 1) /
+		     info->alloc_size;
+	entry_index = find_first_zero_bit(part->usage, nr_entries);
+	if (entry_index == nr_entries)
+		return -ENOBUFS;
+	__set_bit(entry_index, part->usage);
+
+	*p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(part->info,
+							entry_index);
+
+	return 0;
+}
+
+static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part,
+				    u32 kvdl_index)
+{
+	unsigned int entry_index;
+
+	entry_index = mlxsw_sp_kvdl_index_entry_index(part->info,
+						      kvdl_index);
+	__clear_bit(entry_index, part->usage);
+}
 
 int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
 			u32 *p_entry_index)
 {
-	int entry_index;
-	int size;
-	int type_base;
-	int type_size;
-	int type_entries;
-
-	if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) {
-		return -EINVAL;
-	} else if (entry_count == 1) {
-		type_base = MLXSW_SP_KVDL_SINGLE_BASE;
-		type_size = MLXSW_SP_KVDL_SINGLE_SIZE;
-		type_entries = 1;
-	} else {
-		type_base = MLXSW_SP_KVDL_CHUNKS_BASE;
-		type_size = MLXSW_SP_KVDL_CHUNKS_SIZE;
-		type_entries = MLXSW_SP_CHUNK_MAX;
+	struct mlxsw_sp_kvdl_part *part;
+
+	/* Find partition with smallest allocation size satisfying the
+	 * requested size.
+	 */
+	part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
+	if (IS_ERR(part))
+		return PTR_ERR(part);
+
+	return mlxsw_sp_kvdl_part_alloc(part, p_entry_index);
+}
+
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
+{
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index);
+	if (IS_ERR(part))
+		return;
+	mlxsw_sp_kvdl_part_free(part, entry_index);
+}
+
+int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+				   unsigned int entry_count,
+				   unsigned int *p_alloc_size)
+{
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
+	if (IS_ERR(part))
+		return PTR_ERR(part);
+
+	*p_alloc_size = part->info->alloc_size;
+
+	return 0;
+}
+
+static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
+	{
+		.part_index	= 0,
+		.start_index	= MLXSW_SP_KVDL_SINGLE_BASE,
+		.end_index	= MLXSW_SP_KVDL_SINGLE_END,
+		.alloc_size	= 1,
+	},
+	{
+		.part_index	= 1,
+		.start_index	= MLXSW_SP_KVDL_CHUNKS_BASE,
+		.end_index	= MLXSW_SP_KVDL_CHUNKS_END,
+		.alloc_size	= MLXSW_SP_CHUNK_MAX,
+	},
+	{
+		.part_index	= 2,
+		.start_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
+		.end_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_END,
+		.alloc_size	= MLXSW_SP_LARGE_CHUNK_MAX,
+	},
+};
+
+static struct mlxsw_sp_kvdl_part *
+mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
+{
+	struct mlxsw_sp_kvdl_part *part;
+
+	list_for_each_entry(part, &mlxsw_sp->kvdl->parts_list, list) {
+		if (part->info->part_index == part_index)
+			return part;
 	}
 
-	entry_index = type_base;
-	size = type_base + type_size;
-	for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) {
-		int i;
+	return NULL;
+}
+
+static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+				   unsigned int part_index)
+{
+	const struct mlxsw_sp_kvdl_part_info *info;
+	struct mlxsw_sp_kvdl_part *part;
+	unsigned int nr_entries;
+	size_t usage_size;
+
+	info = &kvdl_parts_info[part_index];
+
+	nr_entries = (info->end_index - info->start_index + 1) /
+		     info->alloc_size;
+	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
+	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+	if (!part)
+		return -ENOMEM;
+
+	part->info = info;
+	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+
+	return 0;
+}
+
+static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
+				    unsigned int part_index)
+{
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, part_index);
+	if (!part)
+		return;
+
+	list_del(&part->list);
+	kfree(part);
+}
+
+static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err, i;
+
+	INIT_LIST_HEAD(&mlxsw_sp->kvdl->parts_list);
 
-		for (i = 0; i < type_entries; i++)
-			set_bit(entry_index + i, mlxsw_sp->kvdl.usage);
-		*p_entry_index = entry_index;
-		return 0;
+	for (i = 0; i < ARRAY_SIZE(kvdl_parts_info); i++) {
+		err = mlxsw_sp_kvdl_part_init(mlxsw_sp, i);
+		if (err)
+			goto err_kvdl_part_init;
 	}
-	return -ENOBUFS;
+
+	return 0;
+
+err_kvdl_part_init:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_kvdl_part_fini(mlxsw_sp, i);
+	return err;
 }
 
-void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
+static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	int type_entries;
 	int i;
 
-	if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE)
-		type_entries = 1;
-	else
-		type_entries = MLXSW_SP_CHUNK_MAX;
-	for (i = 0; i < type_entries; i++)
-		clear_bit(entry_index + i, mlxsw_sp->kvdl.usage);
+	for (i = ARRAY_SIZE(kvdl_parts_info) - 1; i >= 0; i--)
+		mlxsw_sp_kvdl_part_fini(mlxsw_sp, i);
+}
+
+int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_kvdl *kvdl;
+	int err;
+
+	kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL);
+	if (!kvdl)
+		return -ENOMEM;
+	mlxsw_sp->kvdl = kvdl;
+
+	err = mlxsw_sp_kvdl_parts_init(mlxsw_sp);
+	if (err)
+		goto err_kvdl_parts_init;
+
+	return 0;
+
+err_kvdl_parts_init:
+	kfree(mlxsw_sp->kvdl);
+	return err;
+}
+
+void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp_kvdl_parts_fini(mlxsw_sp);
+	kfree(mlxsw_sp->kvdl);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
new file mode 100644
index 000000000000..d20b143de3b4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -0,0 +1,1012 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/rhashtable.h>
+
+#include "spectrum_mr.h"
+#include "spectrum_router.h"
+
+struct mlxsw_sp_mr {
+	const struct mlxsw_sp_mr_ops *mr_ops;
+	void *catchall_route_priv;
+	struct delayed_work stats_update_dw;
+	struct list_head table_list;
+#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
+	unsigned long priv[0];
+	/* priv has to be always the last item */
+};
+
+struct mlxsw_sp_mr_vif {
+	struct net_device *dev;
+	const struct mlxsw_sp_rif *rif;
+	unsigned long vif_flags;
+
+	/* A list of route_vif_entry structs that point to routes that the VIF
+	 * instance is used as one of the egress VIFs
+	 */
+	struct list_head route_evif_list;
+
+	/* A list of route_vif_entry structs that point to routes that the VIF
+	 * instance is used as an ingress VIF
+	 */
+	struct list_head route_ivif_list;
+};
+
+struct mlxsw_sp_mr_route_vif_entry {
+	struct list_head vif_node;
+	struct list_head route_node;
+	struct mlxsw_sp_mr_vif *mr_vif;
+	struct mlxsw_sp_mr_route *mr_route;
+};
+
+struct mlxsw_sp_mr_table {
+	struct list_head node;
+	enum mlxsw_sp_l3proto proto;
+	struct mlxsw_sp *mlxsw_sp;
+	u32 vr_id;
+	struct mlxsw_sp_mr_vif vifs[MAXVIFS];
+	struct list_head route_list;
+	struct rhashtable route_ht;
+	char catchall_route_priv[0];
+	/* catchall_route_priv has to be always the last item */
+};
+
+struct mlxsw_sp_mr_route {
+	struct list_head node;
+	struct rhash_head ht_node;
+	struct mlxsw_sp_mr_route_key key;
+	enum mlxsw_sp_mr_route_action route_action;
+	u16 min_mtu;
+	struct mfc_cache *mfc4;
+	void *route_priv;
+	const struct mlxsw_sp_mr_table *mr_table;
+	/* A list of route_vif_entry structs that point to the egress VIFs */
+	struct list_head evif_list;
+	/* A route_vif_entry struct that point to the ingress VIF */
+	struct mlxsw_sp_mr_route_vif_entry ivif;
+};
+
+static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = {
+	.key_len = sizeof(struct mlxsw_sp_mr_route_key),
+	.key_offset = offsetof(struct mlxsw_sp_mr_route, key),
+	.head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node),
+	.automatic_shrinking = true,
+};
+
+static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+	return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
+}
+
+static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif)
+{
+	return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif;
+}
+
+static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif)
+{
+	return vif->dev;
+}
+
+static bool
+mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
+{
+	vifi_t ivif;
+
+	switch (mr_route->mr_table->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		ivif = mr_route->mfc4->mfc_parent;
+		return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		/* fall through */
+	default:
+		WARN_ON_ONCE(1);
+	}
+	return false;
+}
+
+static int
+mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route)
+{
+	struct mlxsw_sp_mr_route_vif_entry *rve;
+	int valid_evifs;
+
+	valid_evifs = 0;
+	list_for_each_entry(rve, &mr_route->evif_list, route_node)
+		if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+			valid_evifs++;
+	return valid_evifs;
+}
+
+static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route)
+{
+	switch (mr_route->mr_table->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		/* fall through */
+	default:
+		WARN_ON_ONCE(1);
+	}
+	return false;
+}
+
+static enum mlxsw_sp_mr_route_action
+mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
+{
+	struct mlxsw_sp_mr_route_vif_entry *rve;
+
+	/* If the ingress port is not regular and resolved, trap the route */
+	if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
+		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+	/* The kernel does not match a (*,G) route that the ingress interface is
+	 * not one of the egress interfaces, so trap these kind of routes.
+	 */
+	if (mlxsw_sp_mr_route_starg(mr_route) &&
+	    !mlxsw_sp_mr_route_ivif_in_evifs(mr_route))
+		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+	/* If the route has no valid eVIFs, trap it. */
+	if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route))
+		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+	/* If one of the eVIFs has no RIF, trap-and-forward the route as there
+	 * is some more routing to do in software too.
+	 */
+	list_for_each_entry(rve, &mr_route->evif_list, route_node)
+		if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif)
+			return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD;
+
+	return MLXSW_SP_MR_ROUTE_ACTION_FORWARD;
+}
+
+static enum mlxsw_sp_mr_route_prio
+mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route)
+{
+	return mlxsw_sp_mr_route_starg(mr_route) ?
+		MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG;
+}
+
+static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
+				   struct mlxsw_sp_mr_route_key *key,
+				   const struct mfc_cache *mfc)
+{
+	bool starg = (mfc->mfc_origin == htonl(INADDR_ANY));
+
+	memset(key, 0, sizeof(*key));
+	key->vrid = mr_table->vr_id;
+	key->proto = mr_table->proto;
+	key->group.addr4 = mfc->mfc_mcastgrp;
+	key->group_mask.addr4 = htonl(0xffffffff);
+	key->source.addr4 = mfc->mfc_origin;
+	key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff);
+}
+
+static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route,
+				       struct mlxsw_sp_mr_vif *mr_vif)
+{
+	struct mlxsw_sp_mr_route_vif_entry *rve;
+
+	rve = kzalloc(sizeof(*rve), GFP_KERNEL);
+	if (!rve)
+		return -ENOMEM;
+	rve->mr_route = mr_route;
+	rve->mr_vif = mr_vif;
+	list_add_tail(&rve->route_node, &mr_route->evif_list);
+	list_add_tail(&rve->vif_node, &mr_vif->route_evif_list);
+	return 0;
+}
+
+static void
+mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+	list_del(&rve->route_node);
+	list_del(&rve->vif_node);
+	kfree(rve);
+}
+
+static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route,
+					struct mlxsw_sp_mr_vif *mr_vif)
+{
+	mr_route->ivif.mr_route = mr_route;
+	mr_route->ivif.mr_vif = mr_vif;
+	list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list);
+}
+
+static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route)
+{
+	list_del(&mr_route->ivif.vif_node);
+}
+
+static int
+mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table,
+			      struct mlxsw_sp_mr_route *mr_route,
+			      struct mlxsw_sp_mr_route_info *route_info)
+{
+	struct mlxsw_sp_mr_route_vif_entry *rve;
+	u16 *erif_indices;
+	u16 irif_index;
+	u16 erif = 0;
+
+	erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices),
+				     GFP_KERNEL);
+	if (!erif_indices)
+		return -ENOMEM;
+
+	list_for_each_entry(rve, &mr_route->evif_list, route_node) {
+		if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+			u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
+
+			erif_indices[erif++] = rifi;
+		}
+	}
+
+	if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
+		irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif);
+	else
+		irif_index = 0;
+
+	route_info->irif_index = irif_index;
+	route_info->erif_indices = erif_indices;
+	route_info->min_mtu = mr_route->min_mtu;
+	route_info->route_action = mr_route->route_action;
+	route_info->erif_num = erif;
+	return 0;
+}
+
+static void
+mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info)
+{
+	kfree(route_info->erif_indices);
+}
+
+static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table,
+				   struct mlxsw_sp_mr_route *mr_route,
+				   bool replace)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	struct mlxsw_sp_mr_route_info route_info;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+	int err;
+
+	err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info);
+	if (err)
+		return err;
+
+	if (!replace) {
+		struct mlxsw_sp_mr_route_params route_params;
+
+		mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size,
+					       GFP_KERNEL);
+		if (!mr_route->route_priv) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		route_params.key = mr_route->key;
+		route_params.value = route_info;
+		route_params.prio = mlxsw_sp_mr_route_prio(mr_route);
+		err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
+					       mr_route->route_priv,
+					       &route_params);
+		if (err)
+			kfree(mr_route->route_priv);
+	} else {
+		err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv,
+					       &route_info);
+	}
+out:
+	mlxsw_sp_mr_route_info_destroy(&route_info);
+	return err;
+}
+
+static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table,
+				    struct mlxsw_sp_mr_route *mr_route)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+	mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv);
+	kfree(mr_route->route_priv);
+}
+
+static struct mlxsw_sp_mr_route *
+mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
+			  struct mfc_cache *mfc)
+{
+	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
+	struct mlxsw_sp_mr_route *mr_route;
+	int err = 0;
+	int i;
+
+	/* Allocate and init a new route and fill it with parameters */
+	mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL);
+	if (!mr_route)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&mr_route->evif_list);
+	mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc);
+
+	/* Find min_mtu and link iVIF and eVIFs */
+	mr_route->min_mtu = ETH_MAX_MTU;
+	ipmr_cache_hold(mfc);
+	mr_route->mfc4 = mfc;
+	mr_route->mr_table = mr_table;
+	for (i = 0; i < MAXVIFS; i++) {
+		if (mfc->mfc_un.res.ttls[i] != 255) {
+			err = mlxsw_sp_mr_route_evif_link(mr_route,
+							  &mr_table->vifs[i]);
+			if (err)
+				goto err;
+			if (mr_table->vifs[i].dev &&
+			    mr_table->vifs[i].dev->mtu < mr_route->min_mtu)
+				mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
+		}
+	}
+	mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+
+	mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
+	return mr_route;
+err:
+	ipmr_cache_put(mfc);
+	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
+		mlxsw_sp_mr_route_evif_unlink(rve);
+	kfree(mr_route);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
+				       struct mlxsw_sp_mr_route *mr_route)
+{
+	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
+
+	mlxsw_sp_mr_route_ivif_unlink(mr_route);
+	ipmr_cache_put(mr_route->mfc4);
+	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
+		mlxsw_sp_mr_route_evif_unlink(rve);
+	kfree(mr_route);
+}
+
+static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
+				      struct mlxsw_sp_mr_route *mr_route)
+{
+	switch (mr_table->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		/* fall through */
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
+static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
+					bool offload)
+{
+	switch (mr_route->mr_table->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		if (offload)
+			mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+		else
+			mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		/* fall through */
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
+static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
+{
+	bool offload;
+
+	offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+	mlxsw_sp_mr_mfc_offload_set(mr_route, offload);
+}
+
+static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+				    struct mlxsw_sp_mr_route *mr_route)
+{
+	mlxsw_sp_mr_mfc_offload_set(mr_route, false);
+	mlxsw_sp_mr_route_erase(mr_table, mr_route);
+	rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
+			       mlxsw_sp_mr_route_ht_params);
+	list_del(&mr_route->node);
+	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
+}
+
+int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
+			   struct mfc_cache *mfc, bool replace)
+{
+	struct mlxsw_sp_mr_route *mr_orig_route = NULL;
+	struct mlxsw_sp_mr_route *mr_route;
+	int err;
+
+	/* If the route is a (*,*) route, abort, as these kind of routes are
+	 * used for proxy routes.
+	 */
+	if (mfc->mfc_origin == htonl(INADDR_ANY) &&
+	    mfc->mfc_mcastgrp == htonl(INADDR_ANY)) {
+		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+			 "Offloading proxy routes is not supported.\n");
+		return -EINVAL;
+	}
+
+	/* Create a new route */
+	mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc);
+	if (IS_ERR(mr_route))
+		return PTR_ERR(mr_route);
+
+	/* Find any route with a matching key */
+	mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht,
+					       &mr_route->key,
+					       mlxsw_sp_mr_route_ht_params);
+	if (replace) {
+		/* On replace case, make the route point to the new route_priv.
+		 */
+		if (WARN_ON(!mr_orig_route)) {
+			err = -ENOENT;
+			goto err_no_orig_route;
+		}
+		mr_route->route_priv = mr_orig_route->route_priv;
+	} else if (mr_orig_route) {
+		/* On non replace case, if another route with the same key was
+		 * found, abort, as duplicate routes are used for proxy routes.
+		 */
+		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+			 "Offloading proxy routes is not supported.\n");
+		err = -EINVAL;
+		goto err_duplicate_route;
+	}
+
+	/* Put it in the table data-structures */
+	list_add_tail(&mr_route->node, &mr_table->route_list);
+	err = rhashtable_insert_fast(&mr_table->route_ht,
+				     &mr_route->ht_node,
+				     mlxsw_sp_mr_route_ht_params);
+	if (err)
+		goto err_rhashtable_insert;
+
+	/* Write the route to the hardware */
+	err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
+	if (err)
+		goto err_mr_route_write;
+
+	/* Destroy the original route */
+	if (replace) {
+		rhashtable_remove_fast(&mr_table->route_ht,
+				       &mr_orig_route->ht_node,
+				       mlxsw_sp_mr_route_ht_params);
+		list_del(&mr_orig_route->node);
+		mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route);
+	}
+
+	mlxsw_sp_mr_mfc_offload_update(mr_route);
+	return 0;
+
+err_mr_route_write:
+	rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
+			       mlxsw_sp_mr_route_ht_params);
+err_rhashtable_insert:
+	list_del(&mr_route->node);
+err_no_orig_route:
+err_duplicate_route:
+	mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
+	return err;
+}
+
+void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
+			    struct mfc_cache *mfc)
+{
+	struct mlxsw_sp_mr_route *mr_route;
+	struct mlxsw_sp_mr_route_key key;
+
+	mlxsw_sp_mr_route4_key(mr_table, &key, mfc);
+	mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
+					  mlxsw_sp_mr_route_ht_params);
+	if (mr_route)
+		__mlxsw_sp_mr_route_del(mr_table, mr_route);
+}
+
+/* Should be called after the VIF struct is updated */
+static int
+mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table,
+			       struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	enum mlxsw_sp_mr_route_action route_action;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+	u16 irif_index;
+	int err;
+
+	route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+	if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+		return 0;
+
+	/* rve->mr_vif->rif is guaranteed to be valid at this stage */
+	irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+	err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv,
+					    irif_index);
+	if (err)
+		return err;
+
+	err = mr->mr_ops->route_action_update(mlxsw_sp,
+					      rve->mr_route->route_priv,
+					      route_action);
+	if (err)
+		/* No need to rollback here because the iRIF change only takes
+		 * place after the action has been updated.
+		 */
+		return err;
+
+	rve->mr_route->route_action = route_action;
+	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+	return 0;
+}
+
+static void
+mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+				 struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+	mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv,
+					MLXSW_SP_MR_ROUTE_ACTION_TRAP);
+	rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+}
+
+/* Should be called after the RIF struct is updated */
+static int
+mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
+			       struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	enum mlxsw_sp_mr_route_action route_action;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+	u16 erif_index = 0;
+	int err;
+
+	/* Update the route action, as the new eVIF can be a tunnel or a pimreg
+	 * device which will require updating the action.
+	 */
+	route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+	if (route_action != rve->mr_route->route_action) {
+		err = mr->mr_ops->route_action_update(mlxsw_sp,
+						      rve->mr_route->route_priv,
+						      route_action);
+		if (err)
+			return err;
+	}
+
+	/* Add the eRIF */
+	if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+		erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+		err = mr->mr_ops->route_erif_add(mlxsw_sp,
+						 rve->mr_route->route_priv,
+						 erif_index);
+		if (err)
+			goto err_route_erif_add;
+	}
+
+	/* Update the minimum MTU */
+	if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) {
+		rve->mr_route->min_mtu = rve->mr_vif->dev->mtu;
+		err = mr->mr_ops->route_min_mtu_update(mlxsw_sp,
+						       rve->mr_route->route_priv,
+						       rve->mr_route->min_mtu);
+		if (err)
+			goto err_route_min_mtu_update;
+	}
+
+	rve->mr_route->route_action = route_action;
+	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+	return 0;
+
+err_route_min_mtu_update:
+	if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+		mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
+					   erif_index);
+err_route_erif_add:
+	if (route_action != rve->mr_route->route_action)
+		mr->mr_ops->route_action_update(mlxsw_sp,
+						rve->mr_route->route_priv,
+						rve->mr_route->route_action);
+	return err;
+}
+
+/* Should be called before the RIF struct is updated */
+static void
+mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+				 struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	enum mlxsw_sp_mr_route_action route_action;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+	u16 rifi;
+
+	/* If the unresolved RIF was not valid, no need to delete it */
+	if (!mlxsw_sp_mr_vif_valid(rve->mr_vif))
+		return;
+
+	/* Update the route action: if there is only one valid eVIF in the
+	 * route, set the action to trap as the VIF deletion will lead to zero
+	 * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to
+	 * determine the route action.
+	 */
+	if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1)
+		route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+	else
+		route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+	if (route_action != rve->mr_route->route_action)
+		mr->mr_ops->route_action_update(mlxsw_sp,
+						rve->mr_route->route_priv,
+						route_action);
+
+	/* Delete the erif from the route */
+	rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
+	mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi);
+	rve->mr_route->route_action = route_action;
+	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+}
+
+static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table,
+				   struct net_device *dev,
+				   struct mlxsw_sp_mr_vif *mr_vif,
+				   unsigned long vif_flags,
+				   const struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_mr_route_vif_entry *irve, *erve;
+	int err;
+
+	/* Update the VIF */
+	mr_vif->dev = dev;
+	mr_vif->rif = rif;
+	mr_vif->vif_flags = vif_flags;
+
+	/* Update all routes where this VIF is used as an unresolved iRIF */
+	list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) {
+		err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve);
+		if (err)
+			goto err_irif_unresolve;
+	}
+
+	/* Update all routes where this VIF is used as an unresolved eRIF */
+	list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) {
+		err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve);
+		if (err)
+			goto err_erif_unresolve;
+	}
+	return 0;
+
+err_erif_unresolve:
+	list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list,
+					 vif_node)
+		mlxsw_sp_mr_route_evif_unresolve(mr_table, erve);
+err_irif_unresolve:
+	list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list,
+					 vif_node)
+		mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve);
+	mr_vif->rif = NULL;
+	return err;
+}
+
+static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+				      struct net_device *dev,
+				      struct mlxsw_sp_mr_vif *mr_vif)
+{
+	struct mlxsw_sp_mr_route_vif_entry *rve;
+
+	/* Update all routes where this VIF is used as an unresolved eRIF */
+	list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node)
+		mlxsw_sp_mr_route_evif_unresolve(mr_table, rve);
+
+	/* Update all routes where this VIF is used as an unresolved iRIF */
+	list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node)
+		mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve);
+
+	/* Update the VIF */
+	mr_vif->dev = dev;
+	mr_vif->rif = NULL;
+}
+
+int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
+			struct net_device *dev, vifi_t vif_index,
+			unsigned long vif_flags, const struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
+
+	if (WARN_ON(vif_index >= MAXVIFS))
+		return -EINVAL;
+	if (mr_vif->dev)
+		return -EEXIST;
+	return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif);
+}
+
+void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index)
+{
+	struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
+
+	if (WARN_ON(vif_index >= MAXVIFS))
+		return;
+	if (WARN_ON(!mr_vif->dev))
+		return;
+	mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif);
+}
+
+static struct mlxsw_sp_mr_vif *
+mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table,
+			   const struct net_device *dev)
+{
+	vifi_t vif_index;
+
+	for (vif_index = 0; vif_index < MAXVIFS; vif_index++)
+		if (mr_table->vifs[vif_index].dev == dev)
+			return &mr_table->vifs[vif_index];
+	return NULL;
+}
+
+int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
+			const struct mlxsw_sp_rif *rif)
+{
+	const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+	struct mlxsw_sp_mr_vif *mr_vif;
+
+	if (!rif_dev)
+		return 0;
+
+	mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+	if (!mr_vif)
+		return 0;
+	return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif,
+				       mr_vif->vif_flags, rif);
+}
+
+void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
+			 const struct mlxsw_sp_rif *rif)
+{
+	const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+	struct mlxsw_sp_mr_vif *mr_vif;
+
+	if (!rif_dev)
+		return;
+
+	mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+	if (!mr_vif)
+		return;
+	mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif);
+}
+
+void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
+				const struct mlxsw_sp_rif *rif, int mtu)
+{
+	const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	struct mlxsw_sp_mr_route_vif_entry *rve;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+	struct mlxsw_sp_mr_vif *mr_vif;
+
+	if (!rif_dev)
+		return;
+
+	/* Search for a VIF that use that RIF */
+	mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+	if (!mr_vif)
+		return;
+
+	/* Update all the routes that uses that VIF as eVIF */
+	list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) {
+		if (mtu < rve->mr_route->min_mtu) {
+			rve->mr_route->min_mtu = mtu;
+			mr->mr_ops->route_min_mtu_update(mlxsw_sp,
+							 rve->mr_route->route_priv,
+							 mtu);
+		}
+	}
+}
+
+struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
+						   u32 vr_id,
+						   enum mlxsw_sp_l3proto proto)
+{
+	struct mlxsw_sp_mr_route_params catchall_route_params = {
+		.prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+		.key = {
+			.vrid = vr_id,
+		},
+		.value = {
+			.route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP,
+		}
+	};
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+	struct mlxsw_sp_mr_table *mr_table;
+	int err;
+	int i;
+
+	mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size,
+			   GFP_KERNEL);
+	if (!mr_table)
+		return ERR_PTR(-ENOMEM);
+
+	mr_table->vr_id = vr_id;
+	mr_table->mlxsw_sp = mlxsw_sp;
+	mr_table->proto = proto;
+	INIT_LIST_HEAD(&mr_table->route_list);
+
+	err = rhashtable_init(&mr_table->route_ht,
+			      &mlxsw_sp_mr_route_ht_params);
+	if (err)
+		goto err_route_rhashtable_init;
+
+	for (i = 0; i < MAXVIFS; i++) {
+		INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list);
+		INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list);
+	}
+
+	err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
+				       mr_table->catchall_route_priv,
+				       &catchall_route_params);
+	if (err)
+		goto err_ops_route_create;
+	list_add_tail(&mr_table->node, &mr->table_list);
+	return mr_table;
+
+err_ops_route_create:
+	rhashtable_destroy(&mr_table->route_ht);
+err_route_rhashtable_init:
+	kfree(mr_table);
+	return ERR_PTR(err);
+}
+
+void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+	WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
+	list_del(&mr_table->node);
+	mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
+				  &mr_table->catchall_route_priv);
+	rhashtable_destroy(&mr_table->route_ht);
+	kfree(mr_table);
+}
+
+void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
+{
+	struct mlxsw_sp_mr_route *mr_route, *tmp;
+	int i;
+
+	list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
+		__mlxsw_sp_mr_route_del(mr_table, mr_route);
+
+	for (i = 0; i < MAXVIFS; i++) {
+		mr_table->vifs[i].dev = NULL;
+		mr_table->vifs[i].rif = NULL;
+	}
+}
+
+bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table)
+{
+	int i;
+
+	for (i = 0; i < MAXVIFS; i++)
+		if (mr_table->vifs[i].dev)
+			return false;
+	return list_empty(&mr_table->route_list);
+}
+
+static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
+					   struct mlxsw_sp_mr_route *mr_route)
+{
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+	u64 packets, bytes;
+
+	if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+		return;
+
+	mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
+				&bytes);
+
+	switch (mr_route->mr_table->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		if (mr_route->mfc4->mfc_un.res.pkt != packets)
+			mr_route->mfc4->mfc_un.res.lastuse = jiffies;
+		mr_route->mfc4->mfc_un.res.pkt = packets;
+		mr_route->mfc4->mfc_un.res.bytes = bytes;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		/* fall through */
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
+static void mlxsw_sp_mr_stats_update(struct work_struct *work)
+{
+	struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr,
+					      stats_update_dw.work);
+	struct mlxsw_sp_mr_table *mr_table;
+	struct mlxsw_sp_mr_route *mr_route;
+	unsigned long interval;
+
+	rtnl_lock();
+	list_for_each_entry(mr_table, &mr->table_list, node)
+		list_for_each_entry(mr_route, &mr_table->route_list, node)
+			mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
+						       mr_route);
+	rtnl_unlock();
+
+	interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
+	mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
+}
+
+int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
+		     const struct mlxsw_sp_mr_ops *mr_ops)
+{
+	struct mlxsw_sp_mr *mr;
+	unsigned long interval;
+	int err;
+
+	mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL);
+	if (!mr)
+		return -ENOMEM;
+	mr->mr_ops = mr_ops;
+	mlxsw_sp->mr = mr;
+	INIT_LIST_HEAD(&mr->table_list);
+
+	err = mr_ops->init(mlxsw_sp, mr->priv);
+	if (err)
+		goto err;
+
+	/* Create the delayed work for counter updates */
+	INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update);
+	interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
+	mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
+	return 0;
+err:
+	kfree(mr);
+	return err;
+}
+
+void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+	cancel_delayed_work_sync(&mr->stats_update_dw);
+	mr->mr_ops->fini(mr->priv);
+	kfree(mr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
new file mode 100644
index 000000000000..5d26a122af49
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
@@ -0,0 +1,134 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_MCROUTER_H
+#define _MLXSW_SPECTRUM_MCROUTER_H
+
+#include <linux/mroute.h>
+#include "spectrum_router.h"
+#include "spectrum.h"
+
+enum mlxsw_sp_mr_route_action {
+	MLXSW_SP_MR_ROUTE_ACTION_FORWARD,
+	MLXSW_SP_MR_ROUTE_ACTION_TRAP,
+	MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD,
+};
+
+enum mlxsw_sp_mr_route_prio {
+	MLXSW_SP_MR_ROUTE_PRIO_SG,
+	MLXSW_SP_MR_ROUTE_PRIO_STARG,
+	MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+	__MLXSW_SP_MR_ROUTE_PRIO_MAX
+};
+
+#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
+
+struct mlxsw_sp_mr_route_key {
+	int vrid;
+	enum mlxsw_sp_l3proto proto;
+	union mlxsw_sp_l3addr group;
+	union mlxsw_sp_l3addr group_mask;
+	union mlxsw_sp_l3addr source;
+	union mlxsw_sp_l3addr source_mask;
+};
+
+struct mlxsw_sp_mr_route_info {
+	enum mlxsw_sp_mr_route_action route_action;
+	u16 irif_index;
+	u16 *erif_indices;
+	size_t erif_num;
+	u16 min_mtu;
+};
+
+struct mlxsw_sp_mr_route_params {
+	struct mlxsw_sp_mr_route_key key;
+	struct mlxsw_sp_mr_route_info value;
+	enum mlxsw_sp_mr_route_prio prio;
+};
+
+struct mlxsw_sp_mr_ops {
+	int priv_size;
+	int route_priv_size;
+	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
+			    void *route_priv,
+			    struct mlxsw_sp_mr_route_params *route_params);
+	int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+			    struct mlxsw_sp_mr_route_info *route_info);
+	int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+			   u64 *packets, u64 *bytes);
+	int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+				   enum mlxsw_sp_mr_route_action route_action);
+	int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+				    u16 min_mtu);
+	int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+				 u16 irif_index);
+	int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+			      u16 erif_index);
+	int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+			      u16 erif_index);
+	void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
+			      void *route_priv);
+	void (*fini)(void *priv);
+};
+
+struct mlxsw_sp_mr;
+struct mlxsw_sp_mr_table;
+
+int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
+		     const struct mlxsw_sp_mr_ops *mr_ops);
+void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
+			   struct mfc_cache *mfc, bool replace);
+void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
+			    struct mfc_cache *mfc);
+int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
+			struct net_device *dev, vifi_t vif_index,
+			unsigned long vif_flags,
+			const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index);
+int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
+			const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
+			 const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
+				const struct mlxsw_sp_rif *rif, int mtu);
+struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
+						   u32 tb_id,
+						   enum mlxsw_sp_l3proto proto);
+void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table);
+void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table);
+bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
new file mode 100644
index 000000000000..34a0b632e5dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -0,0 +1,839 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/parman.h>
+
+#include "spectrum_mr_tcam.h"
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_mr.h"
+
+struct mlxsw_sp_mr_tcam_region {
+	struct mlxsw_sp *mlxsw_sp;
+	enum mlxsw_reg_rtar_key_type rtar_key_type;
+	struct parman *parman;
+	struct parman_prio *parman_prios;
+};
+
+struct mlxsw_sp_mr_tcam {
+	struct mlxsw_sp_mr_tcam_region ipv4_tcam_region;
+};
+
+/* This struct maps to one RIGR2 register entry */
+struct mlxsw_sp_mr_erif_sublist {
+	struct list_head list;
+	u32 rigr2_kvdl_index;
+	int num_erifs;
+	u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS];
+	bool synced;
+};
+
+struct mlxsw_sp_mr_tcam_erif_list {
+	struct list_head erif_sublists;
+	u32 kvdl_index;
+};
+
+static bool
+mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_mr_erif_sublist *erif_sublist)
+{
+	int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+						   MC_ERIF_LIST_ENTRIES);
+
+	return erif_sublist->num_erifs == erif_list_entries;
+}
+
+static void
+mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+	INIT_LIST_HEAD(&erif_list->erif_sublists);
+}
+
+#define MLXSW_SP_KVDL_RIGR2_SIZE 1
+
+static struct mlxsw_sp_mr_erif_sublist *
+mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+	struct mlxsw_sp_mr_erif_sublist *erif_sublist;
+	int err;
+
+	erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL);
+	if (!erif_sublist)
+		return ERR_PTR(-ENOMEM);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE,
+				  &erif_sublist->rigr2_kvdl_index);
+	if (err) {
+		kfree(erif_sublist);
+		return ERR_PTR(err);
+	}
+
+	list_add_tail(&erif_sublist->list, &erif_list->erif_sublists);
+	return erif_sublist;
+}
+
+static void
+mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_mr_erif_sublist *erif_sublist)
+{
+	list_del(&erif_sublist->list);
+	mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index);
+	kfree(erif_sublist);
+}
+
+static int
+mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_mr_tcam_erif_list *erif_list,
+			  u16 erif_index)
+{
+	struct mlxsw_sp_mr_erif_sublist *sublist;
+
+	/* If either there is no erif_entry or the last one is full, allocate a
+	 * new one.
+	 */
+	if (list_empty(&erif_list->erif_sublists)) {
+		sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list);
+		if (IS_ERR(sublist))
+			return PTR_ERR(sublist);
+		erif_list->kvdl_index = sublist->rigr2_kvdl_index;
+	} else {
+		sublist = list_last_entry(&erif_list->erif_sublists,
+					  struct mlxsw_sp_mr_erif_sublist,
+					  list);
+		sublist->synced = false;
+		if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) {
+			sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp,
+								  erif_list);
+			if (IS_ERR(sublist))
+				return PTR_ERR(sublist);
+		}
+	}
+
+	/* Add the eRIF to the last entry's last index */
+	sublist->erif_indices[sublist->num_erifs++] = erif_index;
+	return 0;
+}
+
+static void
+mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+	struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp;
+
+	list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists,
+				 list)
+		mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist);
+}
+
+static int
+mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+	struct mlxsw_sp_mr_erif_sublist *curr_sublist;
+	char rigr2_pl[MLXSW_REG_RIGR2_LEN];
+	int err;
+	int i;
+
+	list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) {
+		if (curr_sublist->synced)
+			continue;
+
+		/* If the sublist is not the last one, pack the next index */
+		if (list_is_last(&curr_sublist->list,
+				 &erif_list->erif_sublists)) {
+			mlxsw_reg_rigr2_pack(rigr2_pl,
+					     curr_sublist->rigr2_kvdl_index,
+					     false, 0);
+		} else {
+			struct mlxsw_sp_mr_erif_sublist *next_sublist;
+
+			next_sublist = list_next_entry(curr_sublist, list);
+			mlxsw_reg_rigr2_pack(rigr2_pl,
+					     curr_sublist->rigr2_kvdl_index,
+					     true,
+					     next_sublist->rigr2_kvdl_index);
+		}
+
+		/* Pack all the erifs */
+		for (i = 0; i < curr_sublist->num_erifs; i++) {
+			u16 erif_index = curr_sublist->erif_indices[i];
+
+			mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true,
+							erif_index);
+		}
+
+		/* Write the entry */
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2),
+				      rigr2_pl);
+		if (err)
+			/* No need of a rollback here because this
+			 * hardware entry should not be pointed yet.
+			 */
+			return err;
+		curr_sublist->synced = true;
+	}
+	return 0;
+}
+
+static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to,
+				       struct mlxsw_sp_mr_tcam_erif_list *from)
+{
+	list_splice(&from->erif_sublists, &to->erif_sublists);
+	to->kvdl_index = from->kvdl_index;
+}
+
+struct mlxsw_sp_mr_tcam_route {
+	struct mlxsw_sp_mr_tcam_erif_list erif_list;
+	struct mlxsw_afa_block *afa_block;
+	u32 counter_index;
+	struct parman_item parman_item;
+	struct parman_prio *parman_prio;
+	enum mlxsw_sp_mr_route_action action;
+	struct mlxsw_sp_mr_route_key key;
+	u16 irif_index;
+	u16 min_mtu;
+};
+
+static struct mlxsw_afa_block *
+mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp,
+				  enum mlxsw_sp_mr_route_action route_action,
+				  u16 irif_index, u32 counter_index,
+				  u16 min_mtu,
+				  struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+	struct mlxsw_afa_block *afa_block;
+	int err;
+
+	afa_block = mlxsw_afa_block_create(mlxsw_sp->afa);
+	if (!afa_block)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlxsw_afa_block_append_counter(afa_block, counter_index);
+	if (err)
+		goto err;
+
+	switch (route_action) {
+	case MLXSW_SP_MR_ROUTE_ACTION_TRAP:
+		err = mlxsw_afa_block_append_trap(afa_block,
+						  MLXSW_TRAP_ID_ACL1);
+		if (err)
+			goto err;
+		break;
+	case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD:
+	case MLXSW_SP_MR_ROUTE_ACTION_FORWARD:
+		/* If we are about to append a multicast router action, commit
+		 * the erif_list.
+		 */
+		err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list);
+		if (err)
+			goto err;
+
+		err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index,
+						      min_mtu, false,
+						      erif_list->kvdl_index);
+		if (err)
+			goto err;
+
+		if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) {
+			err = mlxsw_afa_block_append_trap_and_forward(afa_block,
+								      MLXSW_TRAP_ID_ACL2);
+			if (err)
+				goto err;
+		}
+		break;
+	default:
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = mlxsw_afa_block_commit(afa_block);
+	if (err)
+		goto err;
+	return afa_block;
+err:
+	mlxsw_afa_block_destroy(afa_block);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block)
+{
+	mlxsw_afa_block_destroy(afa_block);
+}
+
+static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
+					  struct parman_item *parman_item,
+					  struct mlxsw_sp_mr_route_key *key,
+					  struct mlxsw_afa_block *afa_block)
+{
+	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
+					  key->vrid,
+					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+					  ntohl(key->group.addr4),
+					  ntohl(key->group_mask.addr4),
+					  ntohl(key->source.addr4),
+					  ntohl(key->source_mask.addr4),
+					  mlxsw_afa_block_first_set(afa_block));
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+	default:
+		WARN_ON_ONCE(1);
+	}
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid,
+					 struct parman_item *parman_item)
+{
+	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+	mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid,
+				  0, 0, 0, 0, 0, 0, NULL);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int
+mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_mr_tcam_erif_list *erif_list,
+			       struct mlxsw_sp_mr_route_info *route_info)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < route_info->erif_num; i++) {
+		u16 erif_index = route_info->erif_indices[i];
+
+		err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list,
+						erif_index);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int
+mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam,
+				       struct mlxsw_sp_mr_tcam_route *route,
+				       enum mlxsw_sp_mr_route_prio prio)
+{
+	struct parman_prio *parman_prio = NULL;
+	int err;
+
+	switch (route->key.proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio];
+		err = parman_item_add(mr_tcam->ipv4_tcam_region.parman,
+				      parman_prio, &route->parman_item);
+		if (err)
+			return err;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+	default:
+		WARN_ON_ONCE(1);
+	}
+	route->parman_prio = parman_prio;
+	return 0;
+}
+
+static void
+mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam,
+					  struct mlxsw_sp_mr_tcam_route *route)
+{
+	switch (route->key.proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		parman_item_remove(mr_tcam->ipv4_tcam_region.parman,
+				   route->parman_prio, &route->parman_item);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
+static int
+mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+			      void *route_priv,
+			      struct mlxsw_sp_mr_route_params *route_params)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+	int err;
+
+	route->key = route_params->key;
+	route->irif_index = route_params->value.irif_index;
+	route->min_mtu = route_params->value.min_mtu;
+	route->action = route_params->value.route_action;
+
+	/* Create the egress RIFs list */
+	mlxsw_sp_mr_erif_list_init(&route->erif_list);
+	err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list,
+					     &route_params->value);
+	if (err)
+		goto err_erif_populate;
+
+	/* Create the flow counter */
+	err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index);
+	if (err)
+		goto err_counter_alloc;
+
+	/* Create the flexible action block */
+	route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+							     route->action,
+							     route->irif_index,
+							     route->counter_index,
+							     route->min_mtu,
+							     &route->erif_list);
+	if (IS_ERR(route->afa_block)) {
+		err = PTR_ERR(route->afa_block);
+		goto err_afa_block_create;
+	}
+
+	/* Allocate place in the TCAM */
+	err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route,
+						     route_params->prio);
+	if (err)
+		goto err_parman_item_add;
+
+	/* Write the route to the TCAM */
+	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					     &route->key, route->afa_block);
+	if (err)
+		goto err_route_replace;
+	return 0;
+
+err_route_replace:
+	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
+err_parman_item_add:
+	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+err_afa_block_create:
+	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
+err_erif_populate:
+err_counter_alloc:
+	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+	return err;
+}
+
+static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
+					   void *priv, void *route_priv)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+	mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid,
+				      &route->parman_item);
+	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
+	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
+	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+}
+
+static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp,
+					void *route_priv, u64 *packets,
+					u64 *bytes)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+
+	return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index,
+					 packets, bytes);
+}
+
+static int
+mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
+				     void *route_priv,
+				     enum mlxsw_sp_mr_route_action route_action)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+	struct mlxsw_afa_block *afa_block;
+	int err;
+
+	/* Create a new flexible action block */
+	afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action,
+						      route->irif_index,
+						      route->counter_index,
+						      route->min_mtu,
+						      &route->erif_list);
+	if (IS_ERR(afa_block))
+		return PTR_ERR(afa_block);
+
+	/* Update the TCAM route entry */
+	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					     &route->key, afa_block);
+	if (err)
+		goto err;
+
+	/* Delete the old one */
+	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+	route->afa_block = afa_block;
+	route->action = route_action;
+	return 0;
+err:
+	mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+	return err;
+}
+
+static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
+						 void *route_priv, u16 min_mtu)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+	struct mlxsw_afa_block *afa_block;
+	int err;
+
+	/* Create a new flexible action block */
+	afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+						      route->action,
+						      route->irif_index,
+						      route->counter_index,
+						      min_mtu,
+						      &route->erif_list);
+	if (IS_ERR(afa_block))
+		return PTR_ERR(afa_block);
+
+	/* Update the TCAM route entry */
+	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					     &route->key, afa_block);
+	if (err)
+		goto err;
+
+	/* Delete the old one */
+	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+	route->afa_block = afa_block;
+	route->min_mtu = min_mtu;
+	return 0;
+err:
+	mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+	return err;
+}
+
+static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp,
+					      void *route_priv, u16 irif_index)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+
+	if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+		return -EINVAL;
+	route->irif_index = irif_index;
+	return 0;
+}
+
+static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp,
+					   void *route_priv, u16 erif_index)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+	int err;
+
+	err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list,
+					erif_index);
+	if (err)
+		return err;
+
+	/* Commit the action only if the route action is not TRAP */
+	if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+		return mlxsw_sp_mr_erif_list_commit(mlxsw_sp,
+						    &route->erif_list);
+	return 0;
+}
+
+static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
+					   void *route_priv, u16 erif_index)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp_mr_erif_sublist *erif_sublist;
+	struct mlxsw_sp_mr_tcam_erif_list erif_list;
+	struct mlxsw_afa_block *afa_block;
+	int err;
+	int i;
+
+	/* Create a copy of the original erif_list without the deleted entry */
+	mlxsw_sp_mr_erif_list_init(&erif_list);
+	list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) {
+		for (i = 0; i < erif_sublist->num_erifs; i++) {
+			u16 curr_erif = erif_sublist->erif_indices[i];
+
+			if (curr_erif == erif_index)
+				continue;
+			err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list,
+							curr_erif);
+			if (err)
+				goto err_erif_list_add;
+		}
+	}
+
+	/* Create the flexible action block pointing to the new erif_list */
+	afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action,
+						      route->irif_index,
+						      route->counter_index,
+						      route->min_mtu,
+						      &erif_list);
+	if (IS_ERR(afa_block)) {
+		err = PTR_ERR(afa_block);
+		goto err_afa_block_create;
+	}
+
+	/* Update the TCAM route entry */
+	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					     &route->key, afa_block);
+	if (err)
+		goto err_route_write;
+
+	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+	route->afa_block = afa_block;
+	mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list);
+	return 0;
+
+err_route_write:
+	mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+err_afa_block_create:
+err_erif_list_add:
+	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list);
+	return err;
+}
+
+static int
+mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+			      struct mlxsw_sp_mr_route_info *route_info)
+{
+	struct mlxsw_sp_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp_mr_tcam_erif_list erif_list;
+	struct mlxsw_afa_block *afa_block;
+	int err;
+
+	/* Create a new erif_list */
+	mlxsw_sp_mr_erif_list_init(&erif_list);
+	err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info);
+	if (err)
+		goto err_erif_populate;
+
+	/* Create the flexible action block pointing to the new erif_list */
+	afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+						      route_info->route_action,
+						      route_info->irif_index,
+						      route->counter_index,
+						      route_info->min_mtu,
+						      &erif_list);
+	if (IS_ERR(afa_block)) {
+		err = PTR_ERR(afa_block);
+		goto err_afa_block_create;
+	}
+
+	/* Update the TCAM route entry */
+	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					     &route->key, afa_block);
+	if (err)
+		goto err_route_write;
+
+	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+	route->afa_block = afa_block;
+	mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list);
+	route->action = route_info->route_action;
+	route->irif_index = route_info->irif_index;
+	route->min_mtu = route_info->min_mtu;
+	return 0;
+
+err_route_write:
+	mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+err_afa_block_create:
+err_erif_populate:
+	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list);
+	return err;
+}
+
+#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16
+
+static int
+mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
+			    mr_tcam_region->rtar_key_type,
+			    MLXSW_SP_MR_TCAM_REGION_BASE_COUNT);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void
+mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
+			    mr_tcam_region->rtar_key_type, 0);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv,
+						 unsigned long new_count)
+{
+	struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+	u64 max_tcam_rules;
+
+	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+	if (new_count > max_tcam_rules)
+		return -EINVAL;
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
+			    mr_tcam_region->rtar_key_type, new_count);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void mlxsw_sp_mr_tcam_region_parman_move(void *priv,
+						unsigned long from_index,
+						unsigned long to_index,
+						unsigned long count)
+{
+	struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rrcr_pl[MLXSW_REG_RRCR_LEN];
+
+	mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
+			    from_index, count,
+			    mr_tcam_region->rtar_key_type, to_index);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
+}
+
+static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = {
+	.base_count	= MLXSW_SP_MR_TCAM_REGION_BASE_COUNT,
+	.resize_step	= MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP,
+	.resize		= mlxsw_sp_mr_tcam_region_parman_resize,
+	.move		= mlxsw_sp_mr_tcam_region_parman_move,
+	.algo		= PARMAN_ALGO_TYPE_LSORT,
+};
+
+static int
+mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_mr_tcam_region *mr_tcam_region,
+			     enum mlxsw_reg_rtar_key_type rtar_key_type)
+{
+	struct parman_prio *parman_prios;
+	struct parman *parman;
+	int err;
+	int i;
+
+	mr_tcam_region->rtar_key_type = rtar_key_type;
+	mr_tcam_region->mlxsw_sp = mlxsw_sp;
+
+	err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region);
+	if (err)
+		return err;
+
+	parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops,
+			       mr_tcam_region);
+	if (!parman) {
+		err = -ENOMEM;
+		goto err_parman_create;
+	}
+	mr_tcam_region->parman = parman;
+
+	parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
+				     sizeof(*parman_prios), GFP_KERNEL);
+	if (!parman_prios) {
+		err = -ENOMEM;
+		goto err_parman_prios_alloc;
+	}
+	mr_tcam_region->parman_prios = parman_prios;
+
+	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+		parman_prio_init(mr_tcam_region->parman,
+				 &mr_tcam_region->parman_prios[i], i);
+	return 0;
+
+err_parman_prios_alloc:
+	parman_destroy(parman);
+err_parman_create:
+	mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
+	return err;
+}
+
+static void
+mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+		parman_prio_fini(&mr_tcam_region->parman_prios[i]);
+	kfree(mr_tcam_region->parman_prios);
+	parman_destroy(mr_tcam_region->parman);
+	mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
+}
+
+static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) ||
+	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+		return -EIO;
+
+	return mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
+					    &mr_tcam->ipv4_tcam_region,
+					    MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST);
+}
+
+static void mlxsw_sp_mr_tcam_fini(void *priv)
+{
+	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+	mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region);
+}
+
+const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {
+	.priv_size = sizeof(struct mlxsw_sp_mr_tcam),
+	.route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route),
+	.init = mlxsw_sp_mr_tcam_init,
+	.route_create = mlxsw_sp_mr_tcam_route_create,
+	.route_update = mlxsw_sp_mr_tcam_route_update,
+	.route_stats = mlxsw_sp_mr_tcam_route_stats,
+	.route_action_update = mlxsw_sp_mr_tcam_route_action_update,
+	.route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update,
+	.route_irif_update = mlxsw_sp_mr_tcam_route_irif_update,
+	.route_erif_add = mlxsw_sp_mr_tcam_route_erif_add,
+	.route_erif_del = mlxsw_sp_mr_tcam_route_erif_del,
+	.route_destroy = mlxsw_sp_mr_tcam_route_destroy,
+	.fini = mlxsw_sp_mr_tcam_fini,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
new file mode 100644
index 000000000000..f9b59ee25406
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
@@ -0,0 +1,43 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H
+#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H
+
+#include "spectrum.h"
+#include "spectrum_mr.h"
+
+extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
new file mode 100644
index 000000000000..c33beac5def0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -0,0 +1,276 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Nogah Frankel <nogahf@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/pkt_cls.h>
+#include <net/red.h>
+
+#include "spectrum.h"
+#include "reg.h"
+
+static int
+mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
+				  int tclass_num, u32 min, u32 max,
+				  u32 probability, bool is_ecn)
+{
+	char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)];
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	int err;
+
+	mlxsw_reg_cwtp_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num);
+	mlxsw_reg_cwtp_profile_pack(cwtp_cmd, MLXSW_REG_CWTP_DEFAULT_PROFILE,
+				    roundup(min, MLXSW_REG_CWTP_MIN_VALUE),
+				    roundup(max, MLXSW_REG_CWTP_MIN_VALUE),
+				    probability);
+
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtp), cwtp_cmd);
+	if (err)
+		return err;
+
+	mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num,
+			     MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd);
+}
+
+static int
+mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port,
+				   int tclass_num)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
+
+	mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
+			     MLXSW_REG_CWTPM_RESET_PROFILE, false, false);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
+}
+
+static void
+mlxsw_sp_setup_tc_qdisc_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+				    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+				    int tclass_num)
+{
+	struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base;
+	struct mlxsw_sp_port_xstats *xstats;
+	struct rtnl_link_stats64 *stats;
+
+	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
+
+	mlxsw_sp_qdisc->tx_packets = stats->tx_packets;
+	mlxsw_sp_qdisc->tx_bytes = stats->tx_bytes;
+
+	switch (mlxsw_sp_qdisc->type) {
+	case MLXSW_SP_QDISC_RED:
+		xstats_base->prob_mark = xstats->ecn;
+		xstats_base->prob_drop = xstats->wred_drop[tclass_num];
+		xstats_base->pdrop = xstats->tail_drop[tclass_num];
+
+		mlxsw_sp_qdisc->overlimits = xstats_base->prob_drop +
+					     xstats_base->prob_mark;
+		mlxsw_sp_qdisc->drops = xstats_base->prob_drop +
+					xstats_base->pdrop;
+		break;
+	default:
+		break;
+	}
+}
+
+static int
+mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
+			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			   int tclass_num)
+{
+	int err;
+
+	if (mlxsw_sp_qdisc->handle != handle)
+		return 0;
+
+	err = mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num);
+	mlxsw_sp_qdisc->handle = TC_H_UNSPEC;
+	mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_NO_QDISC;
+
+	return err;
+}
+
+static int
+mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
+			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			   int tclass_num,
+			   struct tc_red_qopt_offload_params *p)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	u32 min, max;
+	u64 prob;
+	int err = 0;
+
+	if (p->min > p->max) {
+		dev_err(mlxsw_sp->bus_info->dev,
+			"spectrum: RED: min %u is bigger then max %u\n", p->min,
+			p->max);
+		goto err_bad_param;
+	}
+	if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) {
+		dev_err(mlxsw_sp->bus_info->dev,
+			"spectrum: RED: max value %u is too big\n", p->max);
+		goto err_bad_param;
+	}
+	if (p->min == 0 || p->max == 0) {
+		dev_err(mlxsw_sp->bus_info->dev,
+			"spectrum: RED: 0 value is illegal for min and max\n");
+		goto err_bad_param;
+	}
+
+	/* calculate probability in percentage */
+	prob = p->probability;
+	prob *= 100;
+	prob = DIV_ROUND_UP(prob, 1 << 16);
+	prob = DIV_ROUND_UP(prob, 1 << 16);
+	min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min);
+	max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max);
+	err = mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min,
+						max, prob, p->is_ecn);
+	if (err)
+		goto err_config;
+
+	mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_RED;
+	if (mlxsw_sp_qdisc->handle != handle)
+		mlxsw_sp_setup_tc_qdisc_clean_stats(mlxsw_sp_port,
+						    mlxsw_sp_qdisc,
+						    tclass_num);
+
+	mlxsw_sp_qdisc->handle = handle;
+	return 0;
+
+err_bad_param:
+	err = -EINVAL;
+err_config:
+	mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, mlxsw_sp_qdisc->handle,
+				   mlxsw_sp_qdisc, tclass_num);
+	return err;
+}
+
+static int
+mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
+			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			      int tclass_num, struct red_stats *res)
+{
+	struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base;
+	struct mlxsw_sp_port_xstats *xstats;
+
+	if (mlxsw_sp_qdisc->handle != handle ||
+	    mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED)
+		return -EOPNOTSUPP;
+
+	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+
+	res->prob_drop = xstats->wred_drop[tclass_num] - xstats_base->prob_drop;
+	res->prob_mark = xstats->ecn - xstats_base->prob_mark;
+	res->pdrop = xstats->tail_drop[tclass_num] - xstats_base->pdrop;
+	return 0;
+}
+
+static int
+mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
+			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			     int tclass_num,
+			     struct tc_red_qopt_offload_stats *res)
+{
+	u64 tx_bytes, tx_packets, overlimits, drops;
+	struct mlxsw_sp_port_xstats *xstats;
+	struct rtnl_link_stats64 *stats;
+
+	if (mlxsw_sp_qdisc->handle != handle ||
+	    mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED)
+		return -EOPNOTSUPP;
+
+	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
+
+	tx_bytes = stats->tx_bytes - mlxsw_sp_qdisc->tx_bytes;
+	tx_packets = stats->tx_packets - mlxsw_sp_qdisc->tx_packets;
+	overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
+		     mlxsw_sp_qdisc->overlimits;
+	drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
+		mlxsw_sp_qdisc->drops;
+
+	_bstats_update(res->bstats, tx_bytes, tx_packets);
+	res->qstats->overlimits += overlimits;
+	res->qstats->drops += drops;
+	res->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+						xstats->backlog[tclass_num]);
+
+	mlxsw_sp_qdisc->drops +=  drops;
+	mlxsw_sp_qdisc->overlimits += overlimits;
+	mlxsw_sp_qdisc->tx_bytes += tx_bytes;
+	mlxsw_sp_qdisc->tx_packets += tx_packets;
+	return 0;
+}
+
+#define MLXSW_SP_PORT_DEFAULT_TCLASS 0
+
+int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_red_qopt_offload *p)
+{
+	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+	int tclass_num;
+
+	if (p->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	mlxsw_sp_qdisc = &mlxsw_sp_port->root_qdisc;
+	tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
+
+	switch (p->command) {
+	case TC_RED_REPLACE:
+		return mlxsw_sp_qdisc_red_replace(mlxsw_sp_port, p->handle,
+						  mlxsw_sp_qdisc, tclass_num,
+						  &p->set);
+	case TC_RED_DESTROY:
+		return mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, p->handle,
+						  mlxsw_sp_qdisc, tclass_num);
+	case TC_RED_XSTATS:
+		return mlxsw_sp_qdisc_get_red_xstats(mlxsw_sp_port, p->handle,
+						     mlxsw_sp_qdisc, tclass_num,
+						     p->xstats);
+	case TC_RED_STATS:
+		return mlxsw_sp_qdisc_get_red_stats(mlxsw_sp_port, p->handle,
+						    mlxsw_sp_qdisc, tclass_num,
+						    &p->stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 5189022a1c8c..632c7b229054 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -46,6 +46,8 @@
 #include <linux/if_bridge.h>
 #include <linux/socket.h>
 #include <linux/route.h>
+#include <linux/gcd.h>
+#include <linux/random.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
@@ -65,6 +67,8 @@
 #include "spectrum_cnt.h"
 #include "spectrum_dpipe.h"
 #include "spectrum_ipip.h"
+#include "spectrum_mr.h"
+#include "spectrum_mr_tcam.h"
 #include "spectrum_router.h"
 
 struct mlxsw_sp_vr;
@@ -78,6 +82,7 @@ struct mlxsw_sp_router {
 	struct rhashtable neigh_ht;
 	struct rhashtable nexthop_group_ht;
 	struct rhashtable nexthop_ht;
+	struct list_head nexthop_list;
 	struct {
 		struct mlxsw_sp_lpm_tree *trees;
 		unsigned int tree_count;
@@ -92,6 +97,7 @@ struct mlxsw_sp_router {
 	struct list_head ipip_list;
 	bool aborted;
 	struct notifier_block fib_nb;
+	struct notifier_block netevent_nb;
 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
 };
@@ -458,6 +464,7 @@ struct mlxsw_sp_vr {
 	unsigned int rif_count;
 	struct mlxsw_sp_fib *fib4;
 	struct mlxsw_sp_fib *fib6;
+	struct mlxsw_sp_mr_table *mr4_table;
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -652,7 +659,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 
 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 {
-	return !!vr->fib4 || !!vr->fib6;
+	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -692,8 +699,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
 
 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
 {
-	/* For our purpose, squash main and local table into one */
-	if (tb_id == RT_TABLE_LOCAL)
+	/* For our purpose, squash main, default and local tables into one */
+	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
 		tb_id = RT_TABLE_MAIN;
 	return tb_id;
 }
@@ -727,14 +734,17 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
-					      u32 tb_id)
+					      u32 tb_id,
+					      struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_vr *vr;
 	int err;
 
 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
-	if (!vr)
+	if (!vr) {
+		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
 		return ERR_PTR(-EBUSY);
+	}
 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
 	if (IS_ERR(vr->fib4))
 		return ERR_CAST(vr->fib4);
@@ -743,9 +753,18 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 		err = PTR_ERR(vr->fib6);
 		goto err_fib6_create;
 	}
+	vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+						 MLXSW_SP_L3_PROTO_IPV4);
+	if (IS_ERR(vr->mr4_table)) {
+		err = PTR_ERR(vr->mr4_table);
+		goto err_mr_table_create;
+	}
 	vr->tb_id = tb_id;
 	return vr;
 
+err_mr_table_create:
+	mlxsw_sp_fib_destroy(vr->fib6);
+	vr->fib6 = NULL;
 err_fib6_create:
 	mlxsw_sp_fib_destroy(vr->fib4);
 	vr->fib4 = NULL;
@@ -754,27 +773,31 @@ err_fib6_create:
 
 static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
 {
+	mlxsw_sp_mr_table_destroy(vr->mr4_table);
+	vr->mr4_table = NULL;
 	mlxsw_sp_fib_destroy(vr->fib6);
 	vr->fib6 = NULL;
 	mlxsw_sp_fib_destroy(vr->fib4);
 	vr->fib4 = NULL;
 }
 
-static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
+static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+					   struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_vr *vr;
 
 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
 	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
 	if (!vr)
-		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
+		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
 	return vr;
 }
 
 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
 {
 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
-	    list_empty(&vr->fib6->node_list))
+	    list_empty(&vr->fib6->node_list) &&
+	    mlxsw_sp_mr_table_empty(vr->mr4_table))
 		mlxsw_sp_vr_destroy(vr);
 }
 
@@ -920,7 +943,7 @@ __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
 	return __dev_get_by_index(net, tun->parms.link);
 }
 
-static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
+u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
 {
 	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
 
@@ -932,12 +955,14 @@ static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
 
 static struct mlxsw_sp_rif *
 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
-		    const struct mlxsw_sp_rif_params *params);
+		    const struct mlxsw_sp_rif_params *params,
+		    struct netlink_ext_ack *extack);
 
 static struct mlxsw_sp_rif_ipip_lb *
 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
 				enum mlxsw_sp_ipip_type ipipt,
-				struct net_device *ol_dev)
+				struct net_device *ol_dev,
+				struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_rif_params_ipip_lb lb_params;
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
@@ -950,7 +975,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
 		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
 	};
 
-	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common);
+	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
 	if (IS_ERR(rif))
 		return ERR_CAST(rif);
 	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
@@ -969,7 +994,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 		return ERR_PTR(-ENOMEM);
 
 	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
-							    ol_dev);
+							    ol_dev, NULL);
 	if (IS_ERR(ipip_entry->ol_lb)) {
 		ret = ERR_CAST(ipip_entry->ol_lb);
 		goto err_ol_ipip_lb_create;
@@ -977,6 +1002,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 
 	ipip_entry->ipipt = ipipt;
 	ipip_entry->ol_dev = ol_dev;
+	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
 
 	return ipip_entry;
 
@@ -986,72 +1012,12 @@ err_ol_ipip_lb_create:
 }
 
 static void
-mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry)
+mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-	WARN_ON(ipip_entry->ref_count > 0);
 	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
 	kfree(ipip_entry);
 }
 
-static __be32
-mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev)
-{
-	struct ip_tunnel *tun = netdev_priv(ol_dev);
-
-	return tun->parms.iph.saddr;
-}
-
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
-			   const struct net_device *ol_dev)
-{
-	switch (proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev),
-		};
-	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
-	};
-
-	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
-}
-
-__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
-{
-	struct ip_tunnel *tun = netdev_priv(ol_dev);
-
-	return tun->parms.iph.daddr;
-}
-
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
-			   const struct net_device *ol_dev)
-{
-	switch (proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev),
-		};
-	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
-	};
-
-	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
-}
-
-static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
-			       const union mlxsw_sp_l3addr *addr2)
-{
-	return !memcmp(addr1, addr2, sizeof(*addr1));
-}
-
 static bool
 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
 				  const enum mlxsw_sp_l3proto ul_proto,
@@ -1184,60 +1150,28 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
 }
 
 static struct mlxsw_sp_ipip_entry *
-mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
-			enum mlxsw_sp_ipip_type ipipt,
-			struct net_device *ol_dev)
+mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
+			   enum mlxsw_sp_ipip_type ipipt,
+			   struct net_device *ol_dev)
 {
-	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
-	struct mlxsw_sp_router *router = mlxsw_sp->router;
-	struct mlxsw_sp_fib_entry *decap_fib_entry;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
-	enum mlxsw_sp_l3proto ul_proto;
-	union mlxsw_sp_l3addr saddr;
-
-	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
-			    ipip_list_node) {
-		if (ipip_entry->ol_dev == ol_dev)
-			goto inc_ref_count;
-
-		/* The configuration where several tunnels have the same local
-		 * address in the same underlay table needs special treatment in
-		 * the HW. That is currently not implemented in the driver.
-		 */
-		ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
-		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
-		if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
-						      ul_tb_id, ipip_entry))
-			return ERR_PTR(-EEXIST);
-	}
 
 	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
 	if (IS_ERR(ipip_entry))
 		return ipip_entry;
 
-	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
-	if (decap_fib_entry)
-		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
-						  decap_fib_entry);
-
 	list_add_tail(&ipip_entry->ipip_list_node,
 		      &mlxsw_sp->router->ipip_list);
 
-inc_ref_count:
-	++ipip_entry->ref_count;
 	return ipip_entry;
 }
 
 static void
-mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp,
-			struct mlxsw_sp_ipip_entry *ipip_entry)
+mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-	if (--ipip_entry->ref_count == 0) {
-		list_del(&ipip_entry->ipip_list_node);
-		if (ipip_entry->decap_fib_entry)
-			mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
-		mlxsw_sp_ipip_entry_destroy(ipip_entry);
-	}
+	list_del(&ipip_entry->ipip_list_node);
+	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
 }
 
 static bool
@@ -1279,6 +1213,455 @@ mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
 	return NULL;
 }
 
+static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+				      const struct net_device *dev,
+				      enum mlxsw_sp_ipip_type *p_type)
+{
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
+	enum mlxsw_sp_ipip_type ipipt;
+
+	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
+		ipip_ops = router->ipip_ops_arr[ipipt];
+		if (dev->type == ipip_ops->dev_type) {
+			if (p_type)
+				*p_type = ipipt;
+			return true;
+		}
+	}
+	return false;
+}
+
+bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
+				const struct net_device *dev)
+{
+	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
+				   const struct net_device *ol_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+
+	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
+			    ipip_list_node)
+		if (ipip_entry->ol_dev == ol_dev)
+			return ipip_entry;
+
+	return NULL;
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
+				   const struct net_device *ul_dev,
+				   struct mlxsw_sp_ipip_entry *start)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+
+	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
+					ipip_list_node);
+	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
+				     ipip_list_node) {
+		struct net_device *ipip_ul_dev =
+			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
+
+		if (ipip_ul_dev == ul_dev)
+			return ipip_entry;
+	}
+
+	return NULL;
+}
+
+bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
+				const struct net_device *dev)
+{
+	return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
+}
+
+static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
+						const struct net_device *ol_dev,
+						enum mlxsw_sp_ipip_type ipipt)
+{
+	const struct mlxsw_sp_ipip_ops *ops
+		= mlxsw_sp->router->ipip_ops_arr[ipipt];
+
+	/* For deciding whether decap should be offloaded, we don't care about
+	 * overlay protocol, so ask whether either one is supported.
+	 */
+	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
+	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
+}
+
+static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
+						struct net_device *ol_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+	enum mlxsw_sp_l3proto ul_proto;
+	enum mlxsw_sp_ipip_type ipipt;
+	union mlxsw_sp_l3addr saddr;
+	u32 ul_tb_id;
+
+	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
+	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
+		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
+		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
+		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
+		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
+							  saddr, ul_tb_id,
+							  NULL)) {
+			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
+								ol_dev);
+			if (IS_ERR(ipip_entry))
+				return PTR_ERR(ipip_entry);
+		}
+	}
+
+	return 0;
+}
+
+static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
+						   struct net_device *ol_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+
+	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+	if (ipip_entry)
+		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
+}
+
+static void
+mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	struct mlxsw_sp_fib_entry *decap_fib_entry;
+
+	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
+	if (decap_fib_entry)
+		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
+						  decap_fib_entry);
+}
+
+static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
+						struct net_device *ol_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+
+	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+	if (ipip_entry)
+		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
+}
+
+static void
+mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	if (ipip_entry->decap_fib_entry)
+		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+}
+
+static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
+						  struct net_device *ol_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+
+	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+	if (ipip_entry)
+		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
+}
+
+static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_rif *rif);
+static int
+mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_ipip_entry *ipip_entry,
+				 bool keep_encap,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
+	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
+
+	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
+						     ipip_entry->ipipt,
+						     ipip_entry->ol_dev,
+						     extack);
+	if (IS_ERR(new_lb_rif))
+		return PTR_ERR(new_lb_rif);
+	ipip_entry->ol_lb = new_lb_rif;
+
+	if (keep_encap) {
+		list_splice_init(&old_lb_rif->common.nexthop_list,
+				 &new_lb_rif->common.nexthop_list);
+		mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common);
+	}
+
+	mlxsw_sp_rif_destroy(&old_lb_rif->common);
+
+	return 0;
+}
+
+/**
+ * Update the offload related to an IPIP entry. This always updates decap, and
+ * in addition to that it also:
+ * @recreate_loopback: recreates the associated loopback RIF
+ * @keep_encap: updates next hops that use the tunnel netdevice. This is only
+ *              relevant when recreate_loopback is true.
+ * @update_nexthops: updates next hops, keeping the current loopback RIF. This
+ *                   is only relevant when recreate_loopback is false.
+ */
+int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_ipip_entry *ipip_entry,
+					bool recreate_loopback,
+					bool keep_encap,
+					bool update_nexthops,
+					struct netlink_ext_ack *extack)
+{
+	int err;
+
+	/* RIFs can't be edited, so to update loopback, we need to destroy and
+	 * recreate it. That creates a window of opportunity where RALUE and
+	 * RATR registers end up referencing a RIF that's already gone. RATRs
+	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
+	 * of RALUE, demote the decap route back.
+	 */
+	if (ipip_entry->decap_fib_entry)
+		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+
+	if (recreate_loopback) {
+		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
+						       keep_encap, extack);
+		if (err)
+			return err;
+	} else if (update_nexthops) {
+		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
+					    &ipip_entry->ol_lb->common);
+	}
+
+	if (ipip_entry->ol_dev->flags & IFF_UP)
+		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
+
+	return 0;
+}
+
+static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
+						struct net_device *ol_dev,
+						struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry =
+		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+
+	if (!ipip_entry)
+		return 0;
+	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+						   true, false, false, extack);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_ipip_entry *ipip_entry,
+				     struct net_device *ul_dev,
+				     struct netlink_ext_ack *extack)
+{
+	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+						   true, true, false, extack);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_ipip_entry *ipip_entry,
+				    struct net_device *ul_dev)
+{
+	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+						   false, false, true, NULL);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_ipip_entry *ipip_entry,
+				      struct net_device *ul_dev)
+{
+	/* A down underlay device causes encapsulated packets to not be
+	 * forwarded, but decap still works. So refresh next hops without
+	 * touching anything else.
+	 */
+	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
+						   false, false, true, NULL);
+}
+
+static int
+mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
+					struct net_device *ol_dev,
+					struct netlink_ext_ack *extack)
+{
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+	int err;
+
+	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+	if (!ipip_entry)
+		/* A change might make a tunnel eligible for offloading, but
+		 * that is currently not implemented. What falls to slow path
+		 * stays there.
+		 */
+		return 0;
+
+	/* A change might make a tunnel not eligible for offloading. */
+	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
+						 ipip_entry->ipipt)) {
+		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+		return 0;
+	}
+
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
+	return err;
+}
+
+void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	struct net_device *ol_dev = ipip_entry->ol_dev;
+
+	if (ol_dev->flags & IFF_UP)
+		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
+	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
+}
+
+/* The configuration where several tunnels have the same local address in the
+ * same underlay table needs special treatment in the HW. That is currently not
+ * implemented in the driver. This function finds and demotes the first tunnel
+ * with a given source address, except the one passed in in the argument
+ * `except'.
+ */
+bool
+mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
+				     enum mlxsw_sp_l3proto ul_proto,
+				     union mlxsw_sp_l3addr saddr,
+				     u32 ul_tb_id,
+				     const struct mlxsw_sp_ipip_entry *except)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
+
+	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
+				 ipip_list_node) {
+		if (ipip_entry != except &&
+		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
+						      ul_tb_id, ipip_entry)) {
+			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
+						     struct net_device *ul_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
+
+	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
+				 ipip_list_node) {
+		struct net_device *ipip_ul_dev =
+			__mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
+
+		if (ipip_ul_dev == ul_dev)
+			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+	}
+}
+
+int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
+				     struct net_device *ol_dev,
+				     unsigned long event,
+				     struct netdev_notifier_info *info)
+{
+	struct netdev_notifier_changeupper_info *chup;
+	struct netlink_ext_ack *extack;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
+	case NETDEV_UNREGISTER:
+		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
+		return 0;
+	case NETDEV_UP:
+		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
+		return 0;
+	case NETDEV_DOWN:
+		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
+		return 0;
+	case NETDEV_CHANGEUPPER:
+		chup = container_of(info, typeof(*chup), info);
+		extack = info->extack;
+		if (netif_is_l3_master(chup->upper_dev))
+			return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
+								    ol_dev,
+								    extack);
+		return 0;
+	case NETDEV_CHANGE:
+		extack = info->extack;
+		return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
+							       ol_dev, extack);
+	}
+	return 0;
+}
+
+static int
+__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_ipip_entry *ipip_entry,
+				   struct net_device *ul_dev,
+				   unsigned long event,
+				   struct netdev_notifier_info *info)
+{
+	struct netdev_notifier_changeupper_info *chup;
+	struct netlink_ext_ack *extack;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		chup = container_of(info, typeof(*chup), info);
+		extack = info->extack;
+		if (netif_is_l3_master(chup->upper_dev))
+			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
+								    ipip_entry,
+								    ul_dev,
+								    extack);
+		break;
+
+	case NETDEV_UP:
+		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
+							   ul_dev);
+	case NETDEV_DOWN:
+		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
+							     ipip_entry,
+							     ul_dev);
+	}
+	return 0;
+}
+
+int
+mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
+				 struct net_device *ul_dev,
+				 unsigned long event,
+				 struct netdev_notifier_info *info)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
+	int err;
+
+	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
+								ul_dev,
+								ipip_entry))) {
+		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
+							 ul_dev, event, info);
+		if (err) {
+			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
+								 ul_dev);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 struct mlxsw_sp_neigh_key {
 	struct neighbour *n;
 };
@@ -1316,7 +1699,7 @@ mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
 						typeof(*neigh_entry),
 						rif_list_node);
 	}
-	if (neigh_entry->rif_list_node.next == &rif->neigh_list)
+	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
 		return NULL;
 	return list_next_entry(neigh_entry, rif_list_node);
 }
@@ -1664,7 +2047,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
 				      rauhtd_pl);
 		if (err) {
-			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
+			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
 			break;
 		}
 		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
@@ -1857,7 +2240,7 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
 }
 
-struct mlxsw_sp_neigh_event_work {
+struct mlxsw_sp_netevent_work {
 	struct work_struct work;
 	struct mlxsw_sp *mlxsw_sp;
 	struct neighbour *n;
@@ -1865,11 +2248,11 @@ struct mlxsw_sp_neigh_event_work {
 
 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
 {
-	struct mlxsw_sp_neigh_event_work *neigh_work =
-		container_of(work, struct mlxsw_sp_neigh_event_work, work);
-	struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
+	struct mlxsw_sp_netevent_work *net_work =
+		container_of(work, struct mlxsw_sp_netevent_work, work);
+	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
 	struct mlxsw_sp_neigh_entry *neigh_entry;
-	struct neighbour *n = neigh_work->n;
+	struct neighbour *n = net_work->n;
 	unsigned char ha[ETH_ALEN];
 	bool entry_connected;
 	u8 nud_state, dead;
@@ -1905,18 +2288,32 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
 out:
 	rtnl_unlock();
 	neigh_release(n);
-	kfree(neigh_work);
+	kfree(net_work);
 }
 
-int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
-				   unsigned long event, void *ptr)
+static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
+
+static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_netevent_work *net_work =
+		container_of(work, struct mlxsw_sp_netevent_work, work);
+	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
+
+	mlxsw_sp_mp_hash_init(mlxsw_sp);
+	kfree(net_work);
+}
+
+static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
+					  unsigned long event, void *ptr)
 {
-	struct mlxsw_sp_neigh_event_work *neigh_work;
+	struct mlxsw_sp_netevent_work *net_work;
 	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp_router *router;
 	struct mlxsw_sp *mlxsw_sp;
 	unsigned long interval;
 	struct neigh_parms *p;
 	struct neighbour *n;
+	struct net *net;
 
 	switch (event) {
 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
@@ -1950,24 +2347,39 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 		if (!mlxsw_sp_port)
 			return NOTIFY_DONE;
 
-		neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
-		if (!neigh_work) {
+		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
+		if (!net_work) {
 			mlxsw_sp_port_dev_put(mlxsw_sp_port);
 			return NOTIFY_BAD;
 		}
 
-		INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
-		neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-		neigh_work->n = n;
+		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
+		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+		net_work->n = n;
 
 		/* Take a reference to ensure the neighbour won't be
 		 * destructed until we drop the reference in delayed
 		 * work.
 		 */
 		neigh_clone(n);
-		mlxsw_core_schedule_work(&neigh_work->work);
+		mlxsw_core_schedule_work(&net_work->work);
 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
 		break;
+	case NETEVENT_MULTIPATH_HASH_UPDATE:
+		net = ptr;
+
+		if (!net_eq(net, &init_net))
+			return NOTIFY_DONE;
+
+		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
+		if (!net_work)
+			return NOTIFY_BAD;
+
+		router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
+		INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
+		net_work->mlxsw_sp = router->mlxsw_sp;
+		mlxsw_core_schedule_work(&net_work->work);
+		break;
 	}
 
 	return NOTIFY_DONE;
@@ -2004,16 +2416,25 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
 }
 
+static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
+				    const struct mlxsw_sp_rif *rif)
+{
+	char rauht_pl[MLXSW_REG_RAUHT_LEN];
+
+	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
+			     rif->rif_index, rif->addr);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+}
+
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
+	mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-				 rif_list_node) {
-		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
+				 rif_list_node)
 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
-	}
 }
 
 enum mlxsw_sp_nexthop_type {
@@ -2028,6 +2449,7 @@ struct mlxsw_sp_nexthop_key {
 struct mlxsw_sp_nexthop {
 	struct list_head neigh_list_node; /* member of neigh entry list */
 	struct list_head rif_list_node;
+	struct list_head router_list_node;
 	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
 						* this belongs to
 						*/
@@ -2035,6 +2457,9 @@ struct mlxsw_sp_nexthop {
 	struct mlxsw_sp_nexthop_key key;
 	unsigned char gw_addr[sizeof(struct in6_addr)];
 	int ifindex;
+	int nh_weight;
+	int norm_nh_weight;
+	int num_adj_entries;
 	struct mlxsw_sp_rif *rif;
 	u8 should_offload:1, /* set indicates this neigh is connected and
 			      * should be put to KVD linear area of this group.
@@ -2050,6 +2475,8 @@ struct mlxsw_sp_nexthop {
 		struct mlxsw_sp_neigh_entry *neigh_entry;
 		struct mlxsw_sp_ipip_entry *ipip_entry;
 	};
+	unsigned int counter_index;
+	bool counter_valid;
 };
 
 struct mlxsw_sp_nexthop_group {
@@ -2062,10 +2489,118 @@ struct mlxsw_sp_nexthop_group {
 	u32 adj_index;
 	u16 ecmp_size;
 	u16 count;
+	int sum_norm_weight;
 	struct mlxsw_sp_nexthop nexthops[0];
 #define nh_rif	nexthops[0].rif
 };
 
+void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_nexthop *nh)
+{
+	struct devlink *devlink;
+
+	devlink = priv_to_devlink(mlxsw_sp->core);
+	if (!devlink_dpipe_table_counter_enabled(devlink,
+						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
+		return;
+
+	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
+		return;
+
+	nh->counter_valid = true;
+}
+
+void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_nexthop *nh)
+{
+	if (!nh->counter_valid)
+		return;
+	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
+	nh->counter_valid = false;
+}
+
+int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
+{
+	if (!nh->counter_valid)
+		return -EINVAL;
+
+	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
+					 p_counter, NULL);
+}
+
+struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
+					       struct mlxsw_sp_nexthop *nh)
+{
+	if (!nh) {
+		if (list_empty(&router->nexthop_list))
+			return NULL;
+		else
+			return list_first_entry(&router->nexthop_list,
+						typeof(*nh), router_list_node);
+	}
+	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
+		return NULL;
+	return list_next_entry(nh, router_list_node);
+}
+
+bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
+{
+	return nh->offloaded;
+}
+
+unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
+{
+	if (!nh->offloaded)
+		return NULL;
+	return nh->neigh_entry->ha;
+}
+
+int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
+			     u32 *p_adj_size, u32 *p_adj_hash_index)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
+	u32 adj_hash_index = 0;
+	int i;
+
+	if (!nh->offloaded || !nh_grp->adj_index_valid)
+		return -EINVAL;
+
+	*p_adj_index = nh_grp->adj_index;
+	*p_adj_size = nh_grp->ecmp_size;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
+
+		if (nh_iter == nh)
+			break;
+		if (nh_iter->offloaded)
+			adj_hash_index += nh_iter->num_adj_entries;
+	}
+
+	*p_adj_hash_index = adj_hash_index;
+	return 0;
+}
+
+struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
+{
+	return nh->rif;
+}
+
+bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
+	int i;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
+
+		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
+			return true;
+	}
+	return false;
+}
+
 static struct fib_info *
 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
 {
@@ -2323,8 +2858,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				       struct mlxsw_sp_nexthop *nh)
+static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				     struct mlxsw_sp_nexthop *nh)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
 	char ratr_pl[MLXSW_REG_RATR_LEN];
@@ -2333,12 +2868,33 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
 			    adj_index, neigh_entry->rif);
 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+	if (nh->counter_valid)
+		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
+	else
+		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
+
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
 }
 
-static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
-					u32 adj_index,
-					struct mlxsw_sp_nexthop *nh)
+int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+			    struct mlxsw_sp_nexthop *nh)
+{
+	int i;
+
+	for (i = 0; i < nh->num_adj_entries; i++) {
+		int err;
+
+		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
+					  u32 adj_index,
+					  struct mlxsw_sp_nexthop *nh)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 
@@ -2346,6 +2902,24 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
 }
 
+static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
+					u32 adj_index,
+					struct mlxsw_sp_nexthop *nh)
+{
+	int i;
+
+	for (i = 0; i < nh->num_adj_entries; i++) {
+		int err;
+
+		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
+						     nh);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int
 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_nexthop_group *nh_grp,
@@ -2367,7 +2941,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 		if (nh->update || reallocate) {
 			switch (nh->type) {
 			case MLXSW_SP_NEXTHOP_TYPE_ETH:
-				err = mlxsw_sp_nexthop_mac_update
+				err = mlxsw_sp_nexthop_update
 					    (mlxsw_sp, adj_index, nh);
 				break;
 			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
@@ -2380,7 +2954,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 			nh->update = 0;
 			nh->offloaded = 1;
 		}
-		adj_index++;
+		adj_index += nh->num_adj_entries;
 	}
 	return 0;
 }
@@ -2425,17 +2999,118 @@ mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
 	}
 }
 
+static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
+{
+	/* Valid sizes for an adjacency group are:
+	 * 1-64, 512, 1024, 2048 and 4096.
+	 */
+	if (*p_adj_grp_size <= 64)
+		return;
+	else if (*p_adj_grp_size <= 512)
+		*p_adj_grp_size = 512;
+	else if (*p_adj_grp_size <= 1024)
+		*p_adj_grp_size = 1024;
+	else if (*p_adj_grp_size <= 2048)
+		*p_adj_grp_size = 2048;
+	else
+		*p_adj_grp_size = 4096;
+}
+
+static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
+					     unsigned int alloc_size)
+{
+	if (alloc_size >= 4096)
+		*p_adj_grp_size = 4096;
+	else if (alloc_size >= 2048)
+		*p_adj_grp_size = 2048;
+	else if (alloc_size >= 1024)
+		*p_adj_grp_size = 1024;
+	else if (alloc_size >= 512)
+		*p_adj_grp_size = 512;
+}
+
+static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
+				     u16 *p_adj_grp_size)
+{
+	unsigned int alloc_size;
+	int err;
+
+	/* Round up the requested group size to the next size supported
+	 * by the device and make sure the request can be satisfied.
+	 */
+	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
+	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
+					     &alloc_size);
+	if (err)
+		return err;
+	/* It is possible the allocation results in more allocated
+	 * entries than requested. Try to use as much of them as
+	 * possible.
+	 */
+	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
+
+	return 0;
+}
+
+static void
+mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	int i, g = 0, sum_norm_weight = 0;
+	struct mlxsw_sp_nexthop *nh;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+
+		if (!nh->should_offload)
+			continue;
+		if (g > 0)
+			g = gcd(nh->nh_weight, g);
+		else
+			g = nh->nh_weight;
+	}
+
+	for (i = 0; i < nh_grp->count; i++) {
+		nh = &nh_grp->nexthops[i];
+
+		if (!nh->should_offload)
+			continue;
+		nh->norm_nh_weight = nh->nh_weight / g;
+		sum_norm_weight += nh->norm_nh_weight;
+	}
+
+	nh_grp->sum_norm_weight = sum_norm_weight;
+}
+
+static void
+mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	int total = nh_grp->sum_norm_weight;
+	u16 ecmp_size = nh_grp->ecmp_size;
+	int i, weight = 0, lower_bound = 0;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+		int upper_bound;
+
+		if (!nh->should_offload)
+			continue;
+		weight += nh->norm_nh_weight;
+		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
+		nh->num_adj_entries = upper_bound - lower_bound;
+		lower_bound = upper_bound;
+	}
+}
+
 static void
 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_nexthop_group *nh_grp)
 {
+	u16 ecmp_size, old_ecmp_size;
 	struct mlxsw_sp_nexthop *nh;
 	bool offload_change = false;
 	u32 adj_index;
-	u16 ecmp_size = 0;
 	bool old_adj_index_valid;
 	u32 old_adj_index;
-	u16 old_ecmp_size;
 	int i;
 	int err;
 
@@ -2452,8 +3127,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 			if (nh->should_offload)
 				nh->update = 1;
 		}
-		if (nh->should_offload)
-			ecmp_size++;
 	}
 	if (!offload_change) {
 		/* Nothing was added or removed, so no need to reallocate. Just
@@ -2466,12 +3139,19 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		}
 		return;
 	}
-	if (!ecmp_size)
+	mlxsw_sp_nexthop_group_normalize(nh_grp);
+	if (!nh_grp->sum_norm_weight)
 		/* No neigh of this group is connected so we just set
 		 * the trap and let everthing flow through kernel.
 		 */
 		goto set_trap;
 
+	ecmp_size = nh_grp->sum_norm_weight;
+	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
+	if (err)
+		/* No valid allocation size available. */
+		goto set_trap;
+
 	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
 	if (err) {
 		/* We ran out of KVD linear space, just set the
@@ -2486,6 +3166,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 	nh_grp->adj_index_valid = 1;
 	nh_grp->adj_index = adj_index;
 	nh_grp->ecmp_size = ecmp_size;
+	mlxsw_sp_nexthop_group_rebalance(nh_grp);
 	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
 	if (err) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
@@ -2655,38 +3336,28 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
 	neigh_release(n);
 }
 
-static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
-				      const struct net_device *dev,
-				      enum mlxsw_sp_ipip_type *p_type)
+static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
 {
-	struct mlxsw_sp_router *router = mlxsw_sp->router;
-	const struct mlxsw_sp_ipip_ops *ipip_ops;
-	enum mlxsw_sp_ipip_type ipipt;
+	struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
 
-	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
-		ipip_ops = router->ipip_ops_arr[ipipt];
-		if (dev->type == ipip_ops->dev_type) {
-			if (p_type)
-				*p_type = ipipt;
-			return true;
-		}
-	}
-	return false;
+	return ul_dev ? (ul_dev->flags & IFF_UP) : true;
 }
 
 static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
-				      enum mlxsw_sp_ipip_type ipipt,
 				      struct mlxsw_sp_nexthop *nh,
 				      struct net_device *ol_dev)
 {
+	bool removing;
+
 	if (!nh->nh_grp->gateway || nh->ipip_entry)
 		return 0;
 
-	nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev);
-	if (IS_ERR(nh->ipip_entry))
-		return PTR_ERR(nh->ipip_entry);
+	nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+	if (!nh->ipip_entry)
+		return -ENOENT;
 
-	__mlxsw_sp_nexthop_neigh_update(nh, false);
+	removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev);
+	__mlxsw_sp_nexthop_neigh_update(nh, removing);
 	return 0;
 }
 
@@ -2699,7 +3370,6 @@ static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
 		return;
 
 	__mlxsw_sp_nexthop_neigh_update(nh, true);
-	mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry);
 	nh->ipip_entry = NULL;
 }
 
@@ -2743,7 +3413,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
 						     MLXSW_SP_L3_PROTO_IPV4)) {
 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
-		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
 		if (err)
 			return err;
 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
@@ -2784,11 +3454,19 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
 
 	nh->nh_grp = nh_grp;
 	nh->key.fib_nh = fib_nh;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	nh->nh_weight = fib_nh->nh_weight;
+#else
+	nh->nh_weight = 1;
+#endif
 	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
 	if (err)
 		return err;
 
+	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
+
 	if (!dev)
 		return 0;
 
@@ -2812,6 +3490,8 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_nexthop *nh)
 {
 	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
+	list_del(&nh->router_list_node);
+	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
 }
 
@@ -2841,6 +3521,30 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
 }
 
+static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_nexthop *nh;
+	bool removing;
+
+	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
+		switch (nh->type) {
+		case MLXSW_SP_NEXTHOP_TYPE_ETH:
+			removing = false;
+			break;
+		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
+			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
+			break;
+		default:
+			WARN_ON(1);
+			continue;
+		}
+
+		__mlxsw_sp_nexthop_neigh_update(nh, removing);
+		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
+	}
+}
+
 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 					   struct mlxsw_sp_rif *rif)
 {
@@ -3121,7 +3825,7 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
 			return;
 		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
 			mlxsw_sp_fib_entry_offload_set(fib_entry);
-		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry))
+		else
 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
 		return;
 	default:
@@ -3576,7 +4280,7 @@ mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
 	struct mlxsw_sp_vr *vr;
 	int err;
 
-	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
+	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
 	if (IS_ERR(vr))
 		return ERR_CAST(vr);
 	fib = mlxsw_sp_vr_fib(vr, proto);
@@ -4000,7 +4704,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
 	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
 						     MLXSW_SP_L3_PROTO_IPV6)) {
 		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
-		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+		err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
 		if (err)
 			return err;
 		mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
@@ -4038,7 +4742,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
 	struct net_device *dev = rt->dst.dev;
 
 	nh->nh_grp = nh_grp;
+	nh->nh_weight = 1;
 	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+
+	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
 
 	if (!dev)
 		return 0;
@@ -4051,6 +4759,8 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_nexthop *nh)
 {
 	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
+	list_del(&nh->router_list_node);
+	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
 }
 
 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
@@ -4601,6 +5311,75 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
+				     struct mfc_entry_notifier_info *men_info,
+				     bool replace)
+{
+	struct mlxsw_sp_vr *vr;
+
+	if (mlxsw_sp->router->aborted)
+		return 0;
+
+	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
+	if (IS_ERR(vr))
+		return PTR_ERR(vr);
+
+	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
+}
+
+static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
+				      struct mfc_entry_notifier_info *men_info)
+{
+	struct mlxsw_sp_vr *vr;
+
+	if (mlxsw_sp->router->aborted)
+		return;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
+	if (WARN_ON(!vr))
+		return;
+
+	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
+	mlxsw_sp_vr_put(vr);
+}
+
+static int
+mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
+			      struct vif_entry_notifier_info *ven_info)
+{
+	struct mlxsw_sp_rif *rif;
+	struct mlxsw_sp_vr *vr;
+
+	if (mlxsw_sp->router->aborted)
+		return 0;
+
+	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
+	if (IS_ERR(vr))
+		return PTR_ERR(vr);
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
+	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
+				   ven_info->vif_index,
+				   ven_info->vif_flags, rif);
+}
+
+static void
+mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
+			      struct vif_entry_notifier_info *ven_info)
+{
+	struct mlxsw_sp_vr *vr;
+
+	if (mlxsw_sp->router->aborted)
+		return;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
+	if (WARN_ON(!vr))
+		return;
+
+	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
+	mlxsw_sp_vr_put(vr);
+}
+
 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 {
 	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
@@ -4611,6 +5390,10 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		return err;
 
+	/* The multicast router code does not need an abort trap as by default,
+	 * packets that don't match any routes are trapped to the CPU.
+	 */
+
 	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
 	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
 						MLXSW_SP_LPM_TREE_MIN + 1);
@@ -4692,6 +5475,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 
 		if (!mlxsw_sp_vr_is_used(vr))
 			continue;
+
+		mlxsw_sp_mr_table_flush(vr->mr4_table);
 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
 
 		/* If virtual router was only used for IPv4, then it's no
@@ -4724,6 +5509,8 @@ struct mlxsw_sp_fib_event_work {
 		struct fib_entry_notifier_info fen_info;
 		struct fib_rule_notifier_info fr_info;
 		struct fib_nh_notifier_info fnh_info;
+		struct mfc_entry_notifier_info men_info;
+		struct vif_entry_notifier_info ven_info;
 	};
 	struct mlxsw_sp *mlxsw_sp;
 	unsigned long event;
@@ -4734,7 +5521,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 	struct mlxsw_sp_fib_event_work *fib_work =
 		container_of(work, struct mlxsw_sp_fib_event_work, work);
 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
-	struct fib_rule *rule;
 	bool replace, append;
 	int err;
 
@@ -4756,12 +5542,11 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
 		fib_info_put(fib_work->fen_info.fi);
 		break;
-	case FIB_EVENT_RULE_ADD: /* fall through */
-	case FIB_EVENT_RULE_DEL:
-		rule = fib_work->fr_info.rule;
-		if (!fib4_rule_default(rule) && !rule->l3mdev)
-			mlxsw_sp_router_fib_abort(mlxsw_sp);
-		fib_rule_put(rule);
+	case FIB_EVENT_RULE_ADD:
+		/* if we get here, a rule was added that we do not support.
+		 * just do the fib_abort
+		 */
+		mlxsw_sp_router_fib_abort(mlxsw_sp);
 		break;
 	case FIB_EVENT_NH_ADD: /* fall through */
 	case FIB_EVENT_NH_DEL:
@@ -4779,7 +5564,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 	struct mlxsw_sp_fib_event_work *fib_work =
 		container_of(work, struct mlxsw_sp_fib_event_work, work);
 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
-	struct fib_rule *rule;
 	bool replace;
 	int err;
 
@@ -4798,12 +5582,58 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
 		break;
-	case FIB_EVENT_RULE_ADD: /* fall through */
-	case FIB_EVENT_RULE_DEL:
-		rule = fib_work->fr_info.rule;
-		if (!fib6_rule_default(rule) && !rule->l3mdev)
+	case FIB_EVENT_RULE_ADD:
+		/* if we get here, a rule was added that we do not support.
+		 * just do the fib_abort
+		 */
+		mlxsw_sp_router_fib_abort(mlxsw_sp);
+		break;
+	}
+	rtnl_unlock();
+	kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_fib_event_work *fib_work =
+		container_of(work, struct mlxsw_sp_fib_event_work, work);
+	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+	bool replace;
+	int err;
+
+	rtnl_lock();
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_ADD:
+		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+
+		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
+						replace);
+		if (err)
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
+		ipmr_cache_put(fib_work->men_info.mfc);
+		break;
+	case FIB_EVENT_ENTRY_DEL:
+		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
+		ipmr_cache_put(fib_work->men_info.mfc);
+		break;
+	case FIB_EVENT_VIF_ADD:
+		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
+						    &fib_work->ven_info);
+		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
-		fib_rule_put(rule);
+		dev_put(fib_work->ven_info.dev);
+		break;
+	case FIB_EVENT_VIF_DEL:
+		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
+					      &fib_work->ven_info);
+		dev_put(fib_work->ven_info.dev);
+		break;
+	case FIB_EVENT_RULE_ADD:
+		/* if we get here, a rule was added that we do not support.
+		 * just do the fib_abort
+		 */
+		mlxsw_sp_router_fib_abort(mlxsw_sp);
 		break;
 	}
 	rtnl_unlock();
@@ -4813,25 +5643,27 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
 				       struct fib_notifier_info *info)
 {
+	struct fib_entry_notifier_info *fen_info;
+	struct fib_nh_notifier_info *fnh_info;
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
-		memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
-		/* Take referece on fib_info to prevent it from being
+		fen_info = container_of(info, struct fib_entry_notifier_info,
+					info);
+		fib_work->fen_info = *fen_info;
+		/* Take reference on fib_info to prevent it from being
 		 * freed while work is queued. Release it afterwards.
 		 */
 		fib_info_hold(fib_work->fen_info.fi);
 		break;
-	case FIB_EVENT_RULE_ADD: /* fall through */
-	case FIB_EVENT_RULE_DEL:
-		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
-		fib_rule_get(fib_work->fr_info.rule);
-		break;
 	case FIB_EVENT_NH_ADD: /* fall through */
 	case FIB_EVENT_NH_DEL:
-		memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
+		fnh_info = container_of(info, struct fib_nh_notifier_info,
+					info);
+		fib_work->fnh_info = *fnh_info;
 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
 		break;
 	}
@@ -4840,21 +5672,79 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
 				       struct fib_notifier_info *info)
 {
+	struct fib6_entry_notifier_info *fen6_info;
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
-		memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
+		fen6_info = container_of(info, struct fib6_entry_notifier_info,
+					 info);
+		fib_work->fen6_info = *fen6_info;
 		rt6_hold(fib_work->fen6_info.rt);
 		break;
-	case FIB_EVENT_RULE_ADD: /* fall through */
-	case FIB_EVENT_RULE_DEL:
-		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
-		fib_rule_get(fib_work->fr_info.rule);
+	}
+}
+
+static void
+mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
+			    struct fib_notifier_info *info)
+{
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
+		ipmr_cache_hold(fib_work->men_info.mfc);
+		break;
+	case FIB_EVENT_VIF_ADD: /* fall through */
+	case FIB_EVENT_VIF_DEL:
+		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
+		dev_hold(fib_work->ven_info.dev);
 		break;
 	}
 }
 
+static int mlxsw_sp_router_fib_rule_event(unsigned long event,
+					  struct fib_notifier_info *info,
+					  struct mlxsw_sp *mlxsw_sp)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct fib_rule_notifier_info *fr_info;
+	struct fib_rule *rule;
+	int err = 0;
+
+	/* nothing to do at the moment */
+	if (event == FIB_EVENT_RULE_DEL)
+		return 0;
+
+	if (mlxsw_sp->router->aborted)
+		return 0;
+
+	fr_info = container_of(info, struct fib_rule_notifier_info, info);
+	rule = fr_info->rule;
+
+	switch (info->family) {
+	case AF_INET:
+		if (!fib4_rule_default(rule) && !rule->l3mdev)
+			err = -1;
+		break;
+	case AF_INET6:
+		if (!fib6_rule_default(rule) && !rule->l3mdev)
+			err = -1;
+		break;
+	case RTNL_FAMILY_IPMR:
+		if (!ipmr_rule_default(rule) && !rule->l3mdev)
+			err = -1;
+		break;
+	}
+
+	if (err < 0)
+		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
+
+	return err;
+}
+
 /* Called with rcu_read_lock() */
 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 				     unsigned long event, void *ptr)
@@ -4862,16 +5752,28 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	struct mlxsw_sp_fib_event_work *fib_work;
 	struct fib_notifier_info *info = ptr;
 	struct mlxsw_sp_router *router;
+	int err;
 
 	if (!net_eq(info->net, &init_net) ||
-	    (info->family != AF_INET && info->family != AF_INET6))
+	    (info->family != AF_INET && info->family != AF_INET6 &&
+	     info->family != RTNL_FAMILY_IPMR))
 		return NOTIFY_DONE;
 
+	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
+
+	switch (event) {
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		err = mlxsw_sp_router_fib_rule_event(event, info,
+						     router->mlxsw_sp);
+		if (!err)
+			return NOTIFY_DONE;
+	}
+
 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
 	if (WARN_ON(!fib_work))
 		return NOTIFY_BAD;
 
-	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
 	fib_work->mlxsw_sp = router->mlxsw_sp;
 	fib_work->event = event;
 
@@ -4884,6 +5786,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
 		mlxsw_sp_router_fib6_event(fib_work, info);
 		break;
+	case RTNL_FAMILY_IPMR:
+		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
+		mlxsw_sp_router_fibmr_event(fib_work, info);
+		break;
 	}
 
 	mlxsw_core_schedule_work(&fib_work->work);
@@ -5044,9 +5950,15 @@ int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
 	return rif->dev->ifindex;
 }
 
+const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
+{
+	return rif->dev;
+}
+
 static struct mlxsw_sp_rif *
 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
-		    const struct mlxsw_sp_rif_params *params)
+		    const struct mlxsw_sp_rif_params *params,
+		    struct netlink_ext_ack *extack)
 {
 	u32 tb_id = l3mdev_fib_table(params->dev);
 	const struct mlxsw_sp_rif_ops *ops;
@@ -5060,14 +5972,16 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
 	ops = mlxsw_sp->router->rif_ops_arr[type];
 
-	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
+	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
 	if (IS_ERR(vr))
 		return ERR_CAST(vr);
 	vr->rif_count++;
 
 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
 		goto err_rif_index_alloc;
+	}
 
 	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
 	if (!rif) {
@@ -5093,11 +6007,17 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_configure;
 
+	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
+	if (err)
+		goto err_mr_rif_add;
+
 	mlxsw_sp_rif_counters_alloc(rif);
 	mlxsw_sp->router->rifs[rif_index] = rif;
 
 	return rif;
 
+err_mr_rif_add:
+	ops->deconfigure(rif);
 err_configure:
 	if (fid)
 		mlxsw_sp_fid_put(fid);
@@ -5122,6 +6042,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
 
 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
 	mlxsw_sp_rif_counters_free(rif);
+	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
 	ops->deconfigure(rif);
 	if (fid)
 		/* Loopback RIFs are not associated with a FID. */
@@ -5147,7 +6068,8 @@ mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
 
 static int
 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
-			       struct net_device *l3_dev)
+			       struct net_device *l3_dev,
+			       struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -5163,7 +6085,7 @@ mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
 		};
 
 		mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
-		rif = mlxsw_sp_rif_create(mlxsw_sp, &params);
+		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
 		if (IS_ERR(rif))
 			return PTR_ERR(rif);
 	}
@@ -5218,7 +6140,8 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
 
 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
 					     struct net_device *port_dev,
-					     unsigned long event, u16 vid)
+					     unsigned long event, u16 vid,
+					     struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
@@ -5230,7 +6153,7 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
 	switch (event) {
 	case NETDEV_UP:
 		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
-						      l3_dev);
+						      l3_dev, extack);
 	case NETDEV_DOWN:
 		mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
 		break;
@@ -5240,19 +6163,22 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
 }
 
 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
-					unsigned long event)
+					unsigned long event,
+					struct netlink_ext_ack *extack)
 {
 	if (netif_is_bridge_port(port_dev) ||
 	    netif_is_lag_port(port_dev) ||
 	    netif_is_ovs_port(port_dev))
 		return 0;
 
-	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1);
+	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
+						 extack);
 }
 
 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
 					 struct net_device *lag_dev,
-					 unsigned long event, u16 vid)
+					 unsigned long event, u16 vid,
+					 struct netlink_ext_ack *extack)
 {
 	struct net_device *port_dev;
 	struct list_head *iter;
@@ -5262,7 +6188,8 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
 		if (mlxsw_sp_port_dev_check(port_dev)) {
 			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
 								port_dev,
-								event, vid);
+								event, vid,
+								extack);
 			if (err)
 				return err;
 		}
@@ -5272,16 +6199,19 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
 }
 
 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
-				       unsigned long event)
+				       unsigned long event,
+				       struct netlink_ext_ack *extack)
 {
 	if (netif_is_bridge_port(lag_dev))
 		return 0;
 
-	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
+	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
+					     extack);
 }
 
 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
-					  unsigned long event)
+					  unsigned long event,
+					  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
 	struct mlxsw_sp_rif_params params = {
@@ -5291,7 +6221,7 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
 
 	switch (event) {
 	case NETDEV_UP:
-		rif = mlxsw_sp_rif_create(mlxsw_sp, &params);
+		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
 		if (IS_ERR(rif))
 			return PTR_ERR(rif);
 		break;
@@ -5305,7 +6235,8 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
 }
 
 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
-					unsigned long event)
+					unsigned long event,
+					struct netlink_ext_ack *extack)
 {
 	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
 	u16 vid = vlan_dev_vlan_id(vlan_dev);
@@ -5315,27 +6246,28 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
 
 	if (mlxsw_sp_port_dev_check(real_dev))
 		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
-							 event, vid);
+							 event, vid, extack);
 	else if (netif_is_lag_master(real_dev))
 		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
-						     vid);
+						     vid, extack);
 	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
-		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event);
+		return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
 
 	return 0;
 }
 
 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
-				     unsigned long event)
+				     unsigned long event,
+				     struct netlink_ext_ack *extack)
 {
 	if (mlxsw_sp_port_dev_check(dev))
-		return mlxsw_sp_inetaddr_port_event(dev, event);
+		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
 	else if (netif_is_lag_master(dev))
-		return mlxsw_sp_inetaddr_lag_event(dev, event);
+		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
 	else if (netif_is_bridge_master(dev))
-		return mlxsw_sp_inetaddr_bridge_event(dev, event);
+		return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
 	else if (is_vlan_dev(dev))
-		return mlxsw_sp_inetaddr_vlan_event(dev, event);
+		return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
 	else
 		return 0;
 }
@@ -5349,6 +6281,32 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 	struct mlxsw_sp_rif *rif;
 	int err = 0;
 
+	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
+	if (event == NETDEV_UP)
+		goto out;
+
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!mlxsw_sp_rif_should_config(rif, dev, event))
+		goto out;
+
+	err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
+out:
+	return notifier_from_errno(err);
+}
+
+int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
+				  unsigned long event, void *ptr)
+{
+	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
+	struct net_device *dev = ivi->ivi_dev->dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *rif;
+	int err = 0;
+
 	mlxsw_sp = mlxsw_sp_lower_get(dev);
 	if (!mlxsw_sp)
 		goto out;
@@ -5357,7 +6315,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
-	err = __mlxsw_sp_inetaddr_event(dev, event);
+	err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
 out:
 	return notifier_from_errno(err);
 }
@@ -5386,7 +6344,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
 	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
-	__mlxsw_sp_inetaddr_event(dev, event);
+	__mlxsw_sp_inetaddr_event(dev, event, NULL);
 out:
 	rtnl_unlock();
 	dev_put(dev);
@@ -5401,6 +6359,10 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
 	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
 	struct net_device *dev = if6->idev->dev;
 
+	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
+	if (event == NETDEV_UP)
+		return NOTIFY_DONE;
+
 	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
 		return NOTIFY_DONE;
 
@@ -5417,6 +6379,28 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
 	return NOTIFY_DONE;
 }
 
+int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
+				   unsigned long event, void *ptr)
+{
+	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
+	struct net_device *dev = i6vi->i6vi_dev->dev;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *rif;
+	int err = 0;
+
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!mlxsw_sp_rif_should_config(rif, dev, event))
+		goto out;
+
+	err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
+out:
+	return notifier_from_errno(err);
+}
+
 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
 			     const char *mac, int mtu)
 {
@@ -5463,6 +6447,17 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
 	if (err)
 		goto err_rif_fdb_op;
 
+	if (rif->mtu != dev->mtu) {
+		struct mlxsw_sp_vr *vr;
+
+		/* The RIF is relevant only to its mr_table instance, as unlike
+		 * unicast routing, in multicast routing a RIF cannot be shared
+		 * between several multicast routing tables.
+		 */
+		vr = &mlxsw_sp->router->vrs[rif->vr_id];
+		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
+	}
+
 	ether_addr_copy(rif->addr, dev->dev_addr);
 	rif->mtu = dev->mtu;
 
@@ -5478,7 +6473,8 @@ err_rif_edit:
 }
 
 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
-				  struct net_device *l3_dev)
+				  struct net_device *l3_dev,
+				  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_rif *rif;
 
@@ -5487,9 +6483,9 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
 	 */
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
 	if (rif)
-		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
+		__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
 
-	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
+	return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
 }
 
 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
@@ -5500,7 +6496,7 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
 	if (!rif)
 		return;
-	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
+	__mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
 }
 
 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
@@ -5516,10 +6512,14 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
 	case NETDEV_PRECHANGEUPPER:
 		return 0;
 	case NETDEV_CHANGEUPPER:
-		if (info->linking)
-			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
-		else
+		if (info->linking) {
+			struct netlink_ext_ack *extack;
+
+			extack = netdev_notifier_info_to_extack(&info->info);
+			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
+		} else {
 			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
+		}
 		break;
 	}
 
@@ -5625,7 +6625,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 }
 
-static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
 {
 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
 }
@@ -5826,7 +6826,7 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
 	struct mlxsw_sp_vr *ul_vr;
 	int err;
 
-	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id);
+	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
 	if (IS_ERR(ul_vr))
 		return PTR_ERR(ul_vr);
 
@@ -5930,6 +6930,64 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
 	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
 }
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
+{
+	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
+}
+
+static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
+{
+	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
+}
+
+static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
+{
+	bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
+
+	mlxsw_sp_mp_hash_header_set(recr2_pl,
+				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
+	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
+	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
+	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
+	if (only_l3)
+		return;
+	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
+	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
+	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
+	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
+}
+
+static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
+{
+	mlxsw_sp_mp_hash_header_set(recr2_pl,
+				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
+	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
+	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
+	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
+	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+}
+
+static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
+{
+	char recr2_pl[MLXSW_REG_RECR2_LEN];
+	u32 seed;
+
+	get_random_bytes(&seed, sizeof(seed));
+	mlxsw_reg_recr2_pack(recr2_pl, seed);
+	mlxsw_sp_mp4_hash_init(recr2_pl);
+	mlxsw_sp_mp6_hash_init(recr2_pl);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
+}
+#else
+static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
+{
+	return 0;
+}
+#endif
+
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
@@ -5990,10 +7048,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_nexthop_group_ht_init;
 
+	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
 	err = mlxsw_sp_lpm_init(mlxsw_sp);
 	if (err)
 		goto err_lpm_init;
 
+	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
+	if (err)
+		goto err_mr_init;
+
 	err = mlxsw_sp_vrs_init(mlxsw_sp);
 	if (err)
 		goto err_vrs_init;
@@ -6002,6 +7065,16 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_neigh_init;
 
+	mlxsw_sp->router->netevent_nb.notifier_call =
+		mlxsw_sp_router_netevent_event;
+	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+	if (err)
+		goto err_register_netevent_notifier;
+
+	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
+	if (err)
+		goto err_mp_hash_init;
+
 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
 				    mlxsw_sp_router_fib_dump_flush);
@@ -6011,10 +7084,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 
 err_register_fib_notifier:
+err_mp_hash_init:
+	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
+err_register_netevent_notifier:
 	mlxsw_sp_neigh_fini(mlxsw_sp);
 err_neigh_init:
 	mlxsw_sp_vrs_fini(mlxsw_sp);
 err_vrs_init:
+	mlxsw_sp_mr_fini(mlxsw_sp);
+err_mr_init:
 	mlxsw_sp_lpm_fini(mlxsw_sp);
 err_lpm_init:
 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
@@ -6034,8 +7112,10 @@ err_router_init:
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
+	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
 	mlxsw_sp_neigh_fini(mlxsw_sp);
 	mlxsw_sp_vrs_fini(mlxsw_sp);
+	mlxsw_sp_mr_fini(mlxsw_sp);
 	mlxsw_sp_lpm_fini(mlxsw_sp);
 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 345fcc4f38e9..1fb82246ce96 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -62,13 +62,18 @@ enum mlxsw_sp_rif_counter_dir {
 };
 
 struct mlxsw_sp_neigh_entry;
+struct mlxsw_sp_nexthop;
+struct mlxsw_sp_ipip_entry;
 
 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
 					   u16 rif_index);
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
+u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
+const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_rif *rif,
 				   enum mlxsw_sp_rif_counter_dir dir,
@@ -100,12 +105,44 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_neigh_entry *neigh_entry,
 				    bool adding);
 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry);
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
-			   const struct net_device *ol_dev);
-union mlxsw_sp_l3addr
-mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
-			   const struct net_device *ol_dev);
-__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev);
+int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_ipip_entry *ipip_entry,
+					bool recreate_loopback,
+					bool keep_encap,
+					bool update_nexthops,
+					struct netlink_ext_ack *extack);
+void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_ipip_entry *ipip_entry);
+bool
+mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
+				     enum mlxsw_sp_l3proto ul_proto,
+				     union mlxsw_sp_l3addr saddr,
+				     u32 ul_tb_id,
+				     const struct mlxsw_sp_ipip_entry *except);
+struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
+					       struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);
+unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh);
+int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
+			     u32 *p_adj_size, u32 *p_adj_hash_index);
+struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
+#define mlxsw_sp_nexthop_for_each(nh, router)				\
+	for (nh = mlxsw_sp_nexthop_next(router, NULL); nh;		\
+	     nh = mlxsw_sp_nexthop_next(router, nh))
+int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_nexthop *nh, u64 *p_counter);
+int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+			    struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_nexthop *nh);
+
+static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
+				      const union mlxsw_sp_l3addr *addr2)
+{
+	return !memcmp(addr1, addr2, sizeof(*addr1));
+}
 
 #endif /* _MLXSW_ROUTER_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index d39ffbfcc436..7b8548e25ae7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -46,8 +46,10 @@
 #include <linux/workqueue.h>
 #include <linux/jiffies.h>
 #include <linux/rtnetlink.h>
+#include <linux/netlink.h>
 #include <net/switchdev.h>
 
+#include "spectrum_router.h"
 #include "spectrum.h"
 #include "core.h"
 #include "reg.h"
@@ -67,7 +69,6 @@ struct mlxsw_sp_bridge {
 	u32 ageing_time;
 	bool vlan_enabled_exists;
 	struct list_head bridges_list;
-	struct list_head mids_list;
 	DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX);
 	const struct mlxsw_sp_bridge_ops *bridge_8021q_ops;
 	const struct mlxsw_sp_bridge_ops *bridge_8021d_ops;
@@ -77,8 +78,10 @@ struct mlxsw_sp_bridge_device {
 	struct net_device *dev;
 	struct list_head list;
 	struct list_head ports_list;
+	struct list_head mids_list;
 	u8 vlan_enabled:1,
-	   multicast_enabled:1;
+	   multicast_enabled:1,
+	   mrouter:1;
 	const struct mlxsw_sp_bridge_ops *ops;
 };
 
@@ -107,7 +110,8 @@ struct mlxsw_sp_bridge_vlan {
 struct mlxsw_sp_bridge_ops {
 	int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device,
 			 struct mlxsw_sp_bridge_port *bridge_port,
-			 struct mlxsw_sp_port *mlxsw_sp_port);
+			 struct mlxsw_sp_port *mlxsw_sp_port,
+			 struct netlink_ext_ack *extack);
 	void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
 			   struct mlxsw_sp_bridge_port *bridge_port,
 			   struct mlxsw_sp_port *mlxsw_sp_port);
@@ -121,6 +125,20 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_bridge_port *bridge_port,
 			       u16 fid_index);
 
+static void
+mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct mlxsw_sp_bridge_port *bridge_port);
+
+static void
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
+				   struct mlxsw_sp_bridge_device
+				   *bridge_device);
+
+static void
+mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
+				 struct mlxsw_sp_bridge_port *bridge_port,
+				 bool add);
+
 static struct mlxsw_sp_bridge_device *
 mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
 			    const struct net_device *br_dev)
@@ -154,6 +172,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
 	bridge_device->dev = br_dev;
 	bridge_device->vlan_enabled = vlan_enabled;
 	bridge_device->multicast_enabled = br_multicast_enabled(br_dev);
+	bridge_device->mrouter = br_multicast_router(br_dev);
 	INIT_LIST_HEAD(&bridge_device->ports_list);
 	if (vlan_enabled) {
 		bridge->vlan_enabled_exists = true;
@@ -161,6 +180,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
 	} else {
 		bridge_device->ops = bridge->bridge_8021d_ops;
 	}
+	INIT_LIST_HEAD(&bridge_device->mids_list);
 	list_add(&bridge_device->list, &bridge->bridges_list);
 
 	return bridge_device;
@@ -174,6 +194,7 @@ mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge,
 	if (bridge_device->vlan_enabled)
 		bridge->vlan_enabled_exists = false;
 	WARN_ON(!list_empty(&bridge_device->ports_list));
+	WARN_ON(!list_empty(&bridge_device->mids_list));
 	kfree(bridge_device);
 }
 
@@ -249,7 +270,8 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
 	bridge_port->dev = brport_dev;
 	bridge_port->bridge_device = bridge_device;
 	bridge_port->stp_state = BR_STATE_DISABLED;
-	bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC;
+	bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC |
+			     BR_MCAST_FLOOD;
 	INIT_LIST_HEAD(&bridge_port->vlans_list);
 	list_add(&bridge_port->list, &bridge_device->ports_list);
 	bridge_port->ref_count = 1;
@@ -455,7 +477,8 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev,
 					       &attr->u.brport_flags);
 		break;
 	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD;
+		attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD |
+					       BR_MCAST_FLOOD;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -640,8 +663,18 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (err)
 		return err;
 
-	memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags));
+	if (bridge_port->bridge_device->multicast_enabled)
+		goto out;
 
+	err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
+						   MLXSW_SP_FLOOD_TYPE_MC,
+						   brport_flags &
+						   BR_MCAST_FLOOD);
+	if (err)
+		return err;
+
+out:
+	memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags));
 	return 0;
 }
 
@@ -699,10 +732,10 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EINVAL;
 }
 
-static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port,
-					    struct switchdev_trans *trans,
-					    struct net_device *orig_dev,
-					    bool is_port_mc_router)
+static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
+					  struct switchdev_trans *trans,
+					  struct net_device *orig_dev,
+					  bool is_port_mrouter)
 {
 	struct mlxsw_sp_bridge_port *bridge_port;
 	int err;
@@ -720,15 +753,26 @@ static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
 						   MLXSW_SP_FLOOD_TYPE_MC,
-						   is_port_mc_router);
+						   is_port_mrouter);
 	if (err)
 		return err;
 
+	mlxsw_sp_port_mrouter_update_mdb(mlxsw_sp_port, bridge_port,
+					 is_port_mrouter);
 out:
-	bridge_port->mrouter = is_port_mc_router;
+	bridge_port->mrouter = is_port_mrouter;
 	return 0;
 }
 
+static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port)
+{
+	const struct mlxsw_sp_bridge_device *bridge_device;
+
+	bridge_device = bridge_port->bridge_device;
+	return bridge_device->multicast_enabled ? bridge_port->mrouter :
+					bridge_port->flags & BR_MCAST_FLOOD;
+}
+
 static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
 					 struct switchdev_trans *trans,
 					 struct net_device *orig_dev,
@@ -749,9 +793,15 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (!bridge_device)
 		return 0;
 
+	if (bridge_device->multicast_enabled != !mc_disabled) {
+		bridge_device->multicast_enabled = !mc_disabled;
+		mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port,
+						   bridge_device);
+	}
+
 	list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
 		enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC;
-		bool member = mc_disabled ? true : bridge_port->mrouter;
+		bool member = mlxsw_sp_mc_flood(bridge_port);
 
 		err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port,
 							   bridge_port,
@@ -765,6 +815,60 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 }
 
+static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp,
+					 u16 mid_idx, bool add)
+{
+	char *smid_pl;
+	int err;
+
+	smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+	if (!smid_pl)
+		return -ENOMEM;
+
+	mlxsw_reg_smid_pack(smid_pl, mid_idx,
+			    mlxsw_sp_router_port(mlxsw_sp), add);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+	kfree(smid_pl);
+	return err;
+}
+
+static void
+mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_bridge_device *bridge_device,
+				   bool add)
+{
+	struct mlxsw_sp_mid *mid;
+
+	list_for_each_entry(mid, &bridge_device->mids_list, list)
+		mlxsw_sp_smid_router_port_set(mlxsw_sp, mid->mid, add);
+}
+
+static int
+mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				  struct switchdev_trans *trans,
+				  struct net_device *orig_dev,
+				  bool is_mrouter)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_bridge_device *bridge_device;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	/* It's possible we failed to enslave the port, yet this
+	 * operation is executed due to it being deferred.
+	 */
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev);
+	if (!bridge_device)
+		return 0;
+
+	if (bridge_device->mrouter != is_mrouter)
+		mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device,
+						   is_mrouter);
+	bridge_device->mrouter = is_mrouter;
+	return 0;
+}
+
 static int mlxsw_sp_port_attr_set(struct net_device *dev,
 				  const struct switchdev_attr *attr,
 				  struct switchdev_trans *trans)
@@ -793,15 +897,20 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
 						     attr->u.vlan_filtering);
 		break;
 	case SWITCHDEV_ATTR_ID_PORT_MROUTER:
-		err = mlxsw_sp_port_attr_mc_router_set(mlxsw_sp_port, trans,
-						       attr->orig_dev,
-						       attr->u.mrouter);
+		err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, trans,
+						     attr->orig_dev,
+						     attr->u.mrouter);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
 		err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans,
 						    attr->orig_dev,
 						    attr->u.mc_disabled);
 		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+		err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans,
+							attr->orig_dev,
+							attr->u.mrouter);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -810,14 +919,6 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
 	return err;
 }
 
-static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port)
-{
-	const struct mlxsw_sp_bridge_device *bridge_device;
-
-	bridge_device = bridge_port->bridge_device;
-	return !bridge_device->multicast_enabled ? true : bridge_port->mrouter;
-}
-
 static int
 mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
 			    struct mlxsw_sp_bridge_port *bridge_port)
@@ -955,24 +1056,28 @@ mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
 	struct mlxsw_sp_bridge_vlan *bridge_vlan;
 	struct mlxsw_sp_bridge_port *bridge_port;
 	u16 vid = mlxsw_sp_port_vlan->vid;
-	bool last;
+	bool last_port, last_vlan;
 
 	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q &&
 		    mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D))
 		return;
 
 	bridge_port = mlxsw_sp_port_vlan->bridge_port;
+	last_vlan = list_is_singular(&bridge_port->vlans_list);
 	bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid);
-	last = list_is_singular(&bridge_vlan->port_vlan_list);
+	last_port = list_is_singular(&bridge_vlan->port_vlan_list);
 
 	list_del(&mlxsw_sp_port_vlan->bridge_vlan_node);
 	mlxsw_sp_bridge_vlan_put(bridge_vlan);
 	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED);
 	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
-	if (last)
+	if (last_port)
 		mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp,
 					       bridge_port,
 					       mlxsw_sp_fid_index(fid));
+	if (last_vlan)
+		mlxsw_sp_bridge_port_mdb_flush(mlxsw_sp_port, bridge_port);
+
 	mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan);
 
 	mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port);
@@ -1182,7 +1287,7 @@ mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
-				u16 fid, u16 mid, bool adding)
+				u16 fid, u16 mid_idx, bool adding)
 {
 	char *sfd_pl;
 	int err;
@@ -1193,16 +1298,16 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
 
 	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
 	mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid,
-			      MLXSW_REG_SFD_REC_ACTION_NOP, mid);
+			      MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
 	kfree(sfd_pl);
 	return err;
 }
 
-static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid,
-				  bool add, bool clear_all_ports)
+static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx,
+					 long *ports_bitmap,
+					 bool set_router_port)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char *smid_pl;
 	int err, i;
 
@@ -1210,66 +1315,208 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid,
 	if (!smid_pl)
 		return -ENOMEM;
 
-	mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add);
-	if (clear_all_ports) {
-		for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
-			if (mlxsw_sp->ports[i])
-				mlxsw_reg_smid_port_mask_set(smid_pl, i, 1);
+	mlxsw_reg_smid_pack(smid_pl, mid_idx, 0, false);
+	for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
+		if (mlxsw_sp->ports[i])
+			mlxsw_reg_smid_port_mask_set(smid_pl, i, 1);
 	}
+
+	mlxsw_reg_smid_port_mask_set(smid_pl,
+				     mlxsw_sp_router_port(mlxsw_sp), 1);
+
+	for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core))
+		mlxsw_reg_smid_port_set(smid_pl, i, 1);
+
+	mlxsw_reg_smid_port_set(smid_pl, mlxsw_sp_router_port(mlxsw_sp),
+				set_router_port);
+
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+	kfree(smid_pl);
+	return err;
+}
+
+static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				  u16 mid_idx, bool add)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char *smid_pl;
+	int err;
+
+	smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+	if (!smid_pl)
+		return -ENOMEM;
+
+	mlxsw_reg_smid_pack(smid_pl, mid_idx, mlxsw_sp_port->local_port, add);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
 	kfree(smid_pl);
 	return err;
 }
 
-static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp,
-					      const unsigned char *addr,
-					      u16 fid)
+static struct
+mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device,
+				const unsigned char *addr,
+				u16 fid)
 {
 	struct mlxsw_sp_mid *mid;
 
-	list_for_each_entry(mid, &mlxsw_sp->bridge->mids_list, list) {
+	list_for_each_entry(mid, &bridge_device->mids_list, list) {
 		if (ether_addr_equal(mid->addr, addr) && mid->fid == fid)
 			return mid;
 	}
 	return NULL;
 }
 
-static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
-						const unsigned char *addr,
-						u16 fid)
+static void
+mlxsw_sp_bridge_port_get_ports_bitmap(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_bridge_port *bridge_port,
+				      unsigned long *ports_bitmap)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	u64 max_lag_members, i;
+	int lag_id;
+
+	if (!bridge_port->lagged) {
+		set_bit(bridge_port->system_port, ports_bitmap);
+	} else {
+		max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+						     MAX_LAG_MEMBERS);
+		lag_id = bridge_port->lag_id;
+		for (i = 0; i < max_lag_members; i++) {
+			mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp,
+								 lag_id, i);
+			if (mlxsw_sp_port)
+				set_bit(mlxsw_sp_port->local_port,
+					ports_bitmap);
+		}
+	}
+}
+
+static void
+mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap,
+				struct mlxsw_sp_bridge_device *bridge_device,
+				struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_sp_mid *mid;
+	struct mlxsw_sp_bridge_port *bridge_port;
+
+	list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
+		if (bridge_port->mrouter) {
+			mlxsw_sp_bridge_port_get_ports_bitmap(mlxsw_sp,
+							      bridge_port,
+							      flood_bitmap);
+		}
+	}
+}
+
+static bool
+mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_mid *mid,
+			    struct mlxsw_sp_bridge_device *bridge_device)
+{
+	long *flood_bitmap;
+	int num_of_ports;
+	int alloc_size;
 	u16 mid_idx;
+	int err;
 
 	mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap,
 				      MLXSW_SP_MID_MAX);
 	if (mid_idx == MLXSW_SP_MID_MAX)
-		return NULL;
+		return false;
+
+	num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+	alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports);
+	flood_bitmap = kzalloc(alloc_size, GFP_KERNEL);
+	if (!flood_bitmap)
+		return false;
+
+	bitmap_copy(flood_bitmap,  mid->ports_in_mid, num_of_ports);
+	mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp);
+
+	mid->mid = mid_idx;
+	err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap,
+					    bridge_device->mrouter);
+	kfree(flood_bitmap);
+	if (err)
+		return false;
+
+	err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx,
+				   true);
+	if (err)
+		return false;
+
+	set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap);
+	mid->in_hw = true;
+	return true;
+}
+
+static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_mid *mid)
+{
+	if (!mid->in_hw)
+		return 0;
+
+	clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
+	mid->in_hw = false;
+	return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid,
+				    false);
+}
+
+static struct
+mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_bridge_device *bridge_device,
+				  const unsigned char *addr,
+				  u16 fid)
+{
+	struct mlxsw_sp_mid *mid;
+	size_t alloc_size;
 
 	mid = kzalloc(sizeof(*mid), GFP_KERNEL);
 	if (!mid)
 		return NULL;
 
-	set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap);
+	alloc_size = sizeof(unsigned long) *
+		     BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core));
+
+	mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL);
+	if (!mid->ports_in_mid)
+		goto err_ports_in_mid_alloc;
+
 	ether_addr_copy(mid->addr, addr);
 	mid->fid = fid;
-	mid->mid = mid_idx;
-	mid->ref_count = 0;
-	list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list);
+	mid->in_hw = false;
+
+	if (!bridge_device->multicast_enabled)
+		goto out;
 
+	if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device))
+		goto err_write_mdb_entry;
+
+out:
+	list_add_tail(&mid->list, &bridge_device->mids_list);
 	return mid;
+
+err_write_mdb_entry:
+	kfree(mid->ports_in_mid);
+err_ports_in_mid_alloc:
+	kfree(mid);
+	return NULL;
 }
 
-static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_mid *mid)
+static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port,
+					 struct mlxsw_sp_mid *mid)
 {
-	if (--mid->ref_count == 0) {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	int err = 0;
+
+	clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid);
+	if (bitmap_empty(mid->ports_in_mid,
+			 mlxsw_core_max_ports(mlxsw_sp->core))) {
+		err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
 		list_del(&mid->list);
-		clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
+		kfree(mid->ports_in_mid);
 		kfree(mid);
-		return 1;
 	}
-	return 0;
+	return err;
 }
 
 static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -1302,39 +1549,72 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
 
-	mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index);
+	mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index);
 	if (!mid) {
-		mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid_index);
+		mid = __mlxsw_sp_mc_alloc(mlxsw_sp, bridge_device, mdb->addr,
+					  fid_index);
 		if (!mid) {
 			netdev_err(dev, "Unable to allocate MC group\n");
 			return -ENOMEM;
 		}
 	}
-	mid->ref_count++;
+	set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid);
+
+	if (!bridge_device->multicast_enabled)
+		return 0;
 
-	err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true,
-				     mid->ref_count == 1);
+	if (bridge_port->mrouter)
+		return 0;
+
+	err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true);
 	if (err) {
 		netdev_err(dev, "Unable to set SMID\n");
 		goto err_out;
 	}
 
-	if (mid->ref_count == 1) {
-		err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index,
-					   mid->mid, true);
-		if (err) {
-			netdev_err(dev, "Unable to set MC SFD\n");
-			goto err_out;
-		}
-	}
-
 	return 0;
 
 err_out:
-	__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid);
+	mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
 	return err;
 }
 
+static void
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
+				   struct mlxsw_sp_bridge_device
+				   *bridge_device)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_mid *mid;
+	bool mc_enabled;
+
+	mc_enabled = bridge_device->multicast_enabled;
+
+	list_for_each_entry(mid, &bridge_device->mids_list, list) {
+		if (mc_enabled)
+			mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid,
+						    bridge_device);
+		else
+			mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
+	}
+}
+
+static void
+mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
+				 struct mlxsw_sp_bridge_port *bridge_port,
+				 bool add)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct mlxsw_sp_mid *mid;
+
+	bridge_device = bridge_port->bridge_device;
+
+	list_for_each_entry(mid, &bridge_device->mids_list, list) {
+		if (!test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid))
+			mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, add);
+	}
+}
+
 static int mlxsw_sp_port_obj_add(struct net_device *dev,
 				 const struct switchdev_obj *obj,
 				 struct switchdev_trans *trans)
@@ -1399,6 +1679,30 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 }
 
+static int
+__mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			struct mlxsw_sp_bridge_port *bridge_port,
+			struct mlxsw_sp_mid *mid)
+{
+	struct net_device *dev = mlxsw_sp_port->dev;
+	int err;
+
+	if (bridge_port->bridge_device->multicast_enabled) {
+		if (bridge_port->bridge_device->multicast_enabled) {
+			err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid,
+						     false);
+			if (err)
+				netdev_err(dev, "Unable to remove port from SMID\n");
+		}
+	}
+
+	err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
+	if (err)
+		netdev_err(dev, "Unable to remove MC SFD\n");
+
+	return err;
+}
+
 static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
 				 const struct switchdev_obj_port_mdb *mdb)
 {
@@ -1410,8 +1714,6 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
 	struct mlxsw_sp_bridge_port *bridge_port;
 	struct mlxsw_sp_mid *mid;
 	u16 fid_index;
-	u16 mid_idx;
-	int err = 0;
 
 	bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
 	if (!bridge_port)
@@ -1426,25 +1728,33 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
 
-	mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index);
+	mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index);
 	if (!mid) {
 		netdev_err(dev, "Unable to remove port from MC DB\n");
 		return -EINVAL;
 	}
 
-	err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false, false);
-	if (err)
-		netdev_err(dev, "Unable to remove port from SMID\n");
+	return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid);
+}
 
-	mid_idx = mid->mid;
-	if (__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid)) {
-		err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index,
-					   mid_idx, false);
-		if (err)
-			netdev_err(dev, "Unable to remove MC SFD\n");
-	}
+static void
+mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct mlxsw_sp_bridge_port *bridge_port)
+{
+	struct mlxsw_sp_bridge_device *bridge_device;
+	struct mlxsw_sp_mid *mid, *tmp;
 
-	return err;
+	bridge_device = bridge_port->bridge_device;
+
+	list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) {
+		if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) {
+			__mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port,
+						mid);
+		} else if (bridge_device->multicast_enabled &&
+			   bridge_port->mrouter) {
+			mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false);
+		}
+	}
 }
 
 static int mlxsw_sp_port_obj_del(struct net_device *dev,
@@ -1497,12 +1807,15 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
 static int
 mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 				struct mlxsw_sp_bridge_port *bridge_port,
-				struct mlxsw_sp_port *mlxsw_sp_port)
+				struct mlxsw_sp_port *mlxsw_sp_port,
+				struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
-	if (is_vlan_dev(bridge_port->dev))
+	if (is_vlan_dev(bridge_port->dev)) {
+		NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge");
 		return -EINVAL;
+	}
 
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
@@ -1559,13 +1872,16 @@ mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port,
 static int
 mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 				struct mlxsw_sp_bridge_port *bridge_port,
-				struct mlxsw_sp_port *mlxsw_sp_port)
+				struct mlxsw_sp_port *mlxsw_sp_port,
+				struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 	u16 vid;
 
-	if (!is_vlan_dev(bridge_port->dev))
+	if (!is_vlan_dev(bridge_port->dev)) {
+		NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge");
 		return -EINVAL;
+	}
 	vid = vlan_dev_vlan_id(bridge_port->dev);
 
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
@@ -1573,7 +1889,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 		return -EINVAL;
 
 	if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
-		netdev_err(mlxsw_sp_port->dev, "Can't bridge VLAN uppers of the same port\n");
+		NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port");
 		return -EINVAL;
 	}
 
@@ -1616,7 +1932,8 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
 
 int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct net_device *brport_dev,
-			      struct net_device *br_dev)
+			      struct net_device *br_dev,
+			      struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_bridge_device *bridge_device;
@@ -1629,7 +1946,7 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
 	bridge_device = bridge_port->bridge_device;
 
 	err = bridge_device->ops->port_join(bridge_device, bridge_port,
-					    mlxsw_sp_port);
+					    mlxsw_sp_port, extack);
 	if (err)
 		goto err_port_join;
 
@@ -1981,17 +2298,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
 
 }
 
-static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	struct mlxsw_sp_mid *mid, *tmp;
-
-	list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) {
-		list_del(&mid->list);
-		clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
-		kfree(mid);
-	}
-}
-
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_bridge *bridge;
@@ -2003,7 +2309,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 	bridge->mlxsw_sp = mlxsw_sp;
 
 	INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list);
-	INIT_LIST_HEAD(&mlxsw_sp->bridge->mids_list);
 
 	bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops;
 	bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops;
@@ -2014,7 +2319,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	mlxsw_sp_fdb_fini(mlxsw_sp);
-	mlxsw_sp_mids_fini(mlxsw_sp);
 	WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list));
 	kfree(mlxsw_sp->bridge);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index f396a1fef633..ec6cef8267ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -62,6 +62,8 @@ enum {
 	MLXSW_TRAP_ID_TTLERROR = 0x53,
 	MLXSW_TRAP_ID_LBERROR = 0x54,
 	MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
+	MLXSW_TRAP_ID_IPV4_PIM = 0x58,
+	MLXSW_TRAP_ID_RPF = 0x5C,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
 	MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
 	MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61,
@@ -89,6 +91,10 @@ enum {
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
 	MLXSW_TRAP_ID_ACL0 = 0x1C0,
+	/* Multicast trap used for routes with trap action */
+	MLXSW_TRAP_ID_ACL1 = 0x1C1,
+	/* Multicast trap used for routes with trap-and-forward action */
+	MLXSW_TRAP_ID_ACL2 = 0x1C2,
 
 	MLXSW_TRAP_ID_MAX = 0x1FF
 };
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index e798fbe08600..52207508744c 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -4338,11 +4338,11 @@ static void ksz_stop_timer(struct ksz_timer_info *info)
 }
 
 static void ksz_init_timer(struct ksz_timer_info *info, int period,
-	void (*function)(unsigned long), void *data)
+	void (*function)(struct timer_list *))
 {
 	info->max = 0;
 	info->period = period;
-	setup_timer(&info->timer, function, (unsigned long)data);
+	timer_setup(&info->timer, function, 0);
 }
 
 static void ksz_update_timer(struct ksz_timer_info *info)
@@ -6689,9 +6689,9 @@ static void mib_read_work(struct work_struct *work)
 	}
 }
 
-static void mib_monitor(unsigned long ptr)
+static void mib_monitor(struct timer_list *t)
 {
-	struct dev_info *hw_priv = (struct dev_info *) ptr;
+	struct dev_info *hw_priv = from_timer(hw_priv, t, mib_timer_info.timer);
 
 	mib_read_work(&hw_priv->mib_read);
 
@@ -6716,10 +6716,10 @@ static void mib_monitor(unsigned long ptr)
  *
  * This routine is run in a kernel timer to monitor the network device.
  */
-static void dev_monitor(unsigned long ptr)
+static void dev_monitor(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) ptr;
-	struct dev_priv *priv = netdev_priv(dev);
+	struct dev_priv *priv = from_timer(priv, t, monitor_timer_info.timer);
+	struct net_device *dev = priv->mii_if.dev;
 	struct dev_info *hw_priv = priv->adapter;
 	struct ksz_hw *hw = &hw_priv->hw;
 	struct ksz_port *port = &priv->port;
@@ -6789,7 +6789,7 @@ static int __init netdev_init(struct net_device *dev)
 
 	/* 500 ms timeout */
 	ksz_init_timer(&priv->monitor_timer_info, 500 * HZ / 1000,
-		dev_monitor, dev);
+		dev_monitor);
 
 	/* 500 ms timeout */
 	dev->watchdog_timeo = HZ / 2;
@@ -7065,7 +7065,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* 500 ms timeout */
 	ksz_init_timer(&hw_priv->mib_timer_info, 500 * HZ / 1000,
-		mib_monitor, hw_priv);
+		mib_monitor);
 
 	for (i = 0; i < hw->dev_count; i++) {
 		dev = alloc_etherdev(sizeof(struct dev_priv));
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 18af2a23a933..b9a1a9f999ea 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -610,7 +610,7 @@ static int netdev_open(struct net_device *dev);
 static void do_cable_magic(struct net_device *dev);
 static void undo_cable_magic(struct net_device *dev);
 static void check_link(struct net_device *dev);
-static void netdev_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
 static void dump_ring(struct net_device *dev);
 static void ns_tx_timeout(struct net_device *dev);
 static int alloc_ring(struct net_device *dev);
@@ -1571,10 +1571,8 @@ static int netdev_open(struct net_device *dev)
 			dev->name, (int)readl(ioaddr + ChipCmd));
 
 	/* Set the timer to check for link beat. */
-	init_timer(&np->timer);
+	timer_setup(&np->timer, netdev_timer, 0);
 	np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ);
-	np->timer.data = (unsigned long)dev;
-	np->timer.function = netdev_timer; /* timer handler */
 	add_timer(&np->timer);
 
 	return 0;
@@ -1789,10 +1787,10 @@ static void init_registers(struct net_device *dev)
  *    this check via dspcfg_workaround sysfs option.
  * 3) check of death of the RX path due to OOM
  */
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = from_timer(np, t, timer);
+	struct net_device *dev = np->dev;
 	void __iomem * ioaddr = ns_ioaddr(dev);
 	int next_tick = NATSEMI_TIMER_FREQ;
 	const int irq = np->pci_dev->irq;
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 729095db3e08..958fced4dacf 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1600,10 +1600,10 @@ static void ns83820_tx_timeout(struct net_device *ndev)
 	spin_unlock_irqrestore(&dev->tx_lock, flags);
 }
 
-static void ns83820_tx_watch(unsigned long data)
+static void ns83820_tx_watch(struct timer_list *t)
 {
-	struct net_device *ndev = (void *)data;
-	struct ns83820 *dev = PRIV(ndev);
+	struct ns83820 *dev = from_timer(dev, t, tx_watchdog);
+	struct net_device *ndev = dev->ndev;
 
 #if defined(DEBUG)
 	printk("ns83820_tx_watch: %u %u %d\n",
@@ -1652,9 +1652,7 @@ static int ns83820_open(struct net_device *ndev)
 	writel(0, dev->base + TXDP_HI);
 	writel(desc, dev->base + TXDP);
 
-	init_timer(&dev->tx_watchdog);
-	dev->tx_watchdog.data = (unsigned long)ndev;
-	dev->tx_watchdog.function = ns83820_tx_watch;
+	timer_setup(&dev->tx_watchdog, ns83820_tx_watch, 0);
 	mod_timer(&dev->tx_watchdog, jiffies + 2*HZ);
 
 	netif_start_queue(ndev);	/* FIXME: wait for phy to come up */
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 462eda926b1c..b8983e73265a 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -337,12 +337,6 @@ static const char ethtool_driver_stats_keys[][ETH_GSTRING_LEN] = {
 #define S2IO_TEST_LEN	ARRAY_SIZE(s2io_gstrings)
 #define S2IO_STRINGS_LEN	(S2IO_TEST_LEN * ETH_GSTRING_LEN)
 
-#define S2IO_TIMER_CONF(timer, handle, arg, exp)	\
-	init_timer(&timer);				\
-	timer.function = handle;			\
-	timer.data = (unsigned long)arg;		\
-	mod_timer(&timer, (jiffies + exp))		\
-
 /* copy mac addr to def_mac_addr array */
 static void do_s2io_copy_mac_addr(struct s2io_nic *sp, int offset, u64 mac_addr)
 {
@@ -4193,9 +4187,9 @@ pci_map_failed:
 }
 
 static void
-s2io_alarm_handle(unsigned long data)
+s2io_alarm_handle(struct timer_list *t)
 {
-	struct s2io_nic *sp = (struct s2io_nic *)data;
+	struct s2io_nic *sp = from_timer(sp, t, alarm_timer);
 	struct net_device *dev = sp->dev;
 
 	s2io_handle_errors(dev);
@@ -7186,7 +7180,8 @@ static int s2io_card_up(struct s2io_nic *sp)
 		return -ENODEV;
 	}
 
-	S2IO_TIMER_CONF(sp->alarm_timer, s2io_alarm_handle, sp, (HZ/2));
+	timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0);
+	mod_timer(&sp->alarm_timer, jiffies + HZ / 2);
 
 	set_bit(__S2IO_STATE_CARD_UP, &sp->state);
 
diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index 6c5997dc8afc..1a24a7218794 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h
@@ -1094,7 +1094,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget);
 static int s2io_poll_inta(struct napi_struct *napi, int budget);
 static void s2io_init_pci(struct s2io_nic * sp);
 static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr);
-static void s2io_alarm_handle(unsigned long data);
+static void s2io_alarm_handle(struct timer_list *t);
 static irqreturn_t
 s2io_msix_ring_handle(int irq, void *dev_id);
 static irqreturn_t
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 5dd5f61e1114..fe7e0e1dd01d 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -1122,7 +1122,6 @@ static void vxge_set_multicast(struct net_device *dev)
 	struct netdev_hw_addr *ha;
 	struct vxgedev *vdev;
 	int i, mcast_cnt = 0;
-	struct __vxge_hw_device *hldev;
 	struct vxge_vpath *vpath;
 	enum vxge_hw_status status = VXGE_HW_OK;
 	struct macInfo mac_info;
@@ -1136,7 +1135,6 @@ static void vxge_set_multicast(struct net_device *dev)
 		"%s:%d", __func__, __LINE__);
 
 	vdev = netdev_priv(dev);
-	hldev = vdev->devh;
 
 	if (unlikely(!is_vxge_card_up(vdev)))
 		return;
@@ -1283,7 +1281,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr = p;
 	struct vxgedev *vdev;
-	struct __vxge_hw_device *hldev;
 	enum vxge_hw_status status = VXGE_HW_OK;
 	struct macInfo mac_info_new, mac_info_old;
 	int vpath_idx = 0;
@@ -1291,7 +1288,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p)
 	vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__);
 
 	vdev = netdev_priv(dev);
-	hldev = vdev->devh;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EINVAL;
@@ -2177,7 +2173,6 @@ static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring)
  */
 static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
 {
-	struct net_device *dev;
 	struct __vxge_hw_device *hldev;
 	u64 reason;
 	enum vxge_hw_status status;
@@ -2185,7 +2180,6 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
 
 	vxge_debug_intr(VXGE_TRACE, "%s:%d", __func__, __LINE__);
 
-	dev = vdev->ndev;
 	hldev = pci_get_drvdata(vdev->pdev);
 
 	if (pci_channel_offline(vdev->pdev))
@@ -2597,9 +2591,9 @@ INTA_MODE:
 	return VXGE_HW_OK;
 }
 
-static void vxge_poll_vp_reset(unsigned long data)
+static void vxge_poll_vp_reset(struct timer_list *t)
 {
-	struct vxgedev *vdev = (struct vxgedev *)data;
+	struct vxgedev *vdev = from_timer(vdev, t, vp_reset_timer);
 	int i, j = 0;
 
 	for (i = 0; i < vdev->no_of_vpath; i++) {
@@ -2616,9 +2610,9 @@ static void vxge_poll_vp_reset(unsigned long data)
 	mod_timer(&vdev->vp_reset_timer, jiffies + HZ / 2);
 }
 
-static void vxge_poll_vp_lockup(unsigned long data)
+static void vxge_poll_vp_lockup(struct timer_list *t)
 {
-	struct vxgedev *vdev = (struct vxgedev *)data;
+	struct vxgedev *vdev = from_timer(vdev, t, vp_lockup_timer);
 	enum vxge_hw_status status = VXGE_HW_OK;
 	struct vxge_vpath *vpath;
 	struct vxge_ring *ring;
@@ -2713,14 +2707,13 @@ static int vxge_open(struct net_device *dev)
 	struct vxge_vpath *vpath;
 	int ret = 0;
 	int i;
-	u64 val64, function_mode;
+	u64 val64;
 
 	vxge_debug_entryexit(VXGE_TRACE,
 		"%s: %s:%d", dev->name, __func__, __LINE__);
 
 	vdev = netdev_priv(dev);
 	hldev = pci_get_drvdata(vdev->pdev);
-	function_mode = vdev->config.device_hw_info.function_mode;
 
 	/* make sure you have link off by default every time Nic is
 	 * initialized */
@@ -2858,12 +2851,12 @@ static int vxge_open(struct net_device *dev)
 		vdev->config.rx_pause_enable);
 
 	if (vdev->vp_reset_timer.function == NULL)
-		vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, vdev,
+		vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset,
 			      HZ / 2);
 
 	/* There is no need to check for RxD leak and RxD lookup on Titan1A */
 	if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL)
-		vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev,
+		vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup,
 			      HZ / 2);
 
 	set_bit(__VXGE_STATE_CARD_UP, &vdev->state);
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h
index 3a79d93b8445..59a57ff5e96a 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h
@@ -417,12 +417,10 @@ struct vxge_tx_priv {
 	module_param(p, int, 0)
 
 static inline
-void vxge_os_timer(struct timer_list *timer, void (*func)(unsigned long data),
-		   struct vxgedev *vdev, unsigned long timeout)
+void vxge_os_timer(struct timer_list *timer, void (*func)(struct timer_list *),
+		   unsigned long timeout)
 {
-	init_timer(timer);
-	timer->function = func;
-	timer->data = (unsigned long)vdev;
+	timer_setup(timer, func, 0);
 	mod_timer(timer, jiffies + timeout);
 }
 
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
index 5f630a24e491..0c3b5dea2858 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
@@ -1209,9 +1209,6 @@ void vxge_hw_ring_rxd_pre_post(struct __vxge_hw_ring *ring, void *rxdh)
 void vxge_hw_ring_rxd_post_post(struct __vxge_hw_ring *ring, void *rxdh)
 {
 	struct vxge_hw_ring_rxd_1 *rxdp = (struct vxge_hw_ring_rxd_1 *)rxdh;
-	struct __vxge_hw_channel *channel;
-
-	channel = &ring->channel;
 
 	rxdp->control_0	= VXGE_HW_RING_RXD_LIST_OWN_ADAPTER;
 
@@ -1359,11 +1356,8 @@ exit:
 enum vxge_hw_status vxge_hw_ring_handle_tcode(
 	struct __vxge_hw_ring *ring, void *rxdh, u8 t_code)
 {
-	struct __vxge_hw_channel *channel;
 	enum vxge_hw_status status = VXGE_HW_OK;
 
-	channel = &ring->channel;
-
 	/* If the t_code is not supported and if the
 	 * t_code is other than 0x5 (unparseable packet
 	 * such as unknown UPV6 header), Drop it !!!
@@ -1399,10 +1393,6 @@ exit:
 static void __vxge_hw_non_offload_db_post(struct __vxge_hw_fifo *fifo,
 	u64 txdl_ptr, u32 num_txds, u32 no_snoop)
 {
-	struct __vxge_hw_channel *channel;
-
-	channel = &fifo->channel;
-
 	writeq(VXGE_HW_NODBW_TYPE(VXGE_HW_NODBW_TYPE_NODBW) |
 		VXGE_HW_NODBW_LAST_TXD_NUMBER(num_txds) |
 		VXGE_HW_NODBW_GET_NO_SNOOP(no_snoop),
@@ -1506,9 +1496,6 @@ void vxge_hw_fifo_txdl_buffer_set(struct __vxge_hw_fifo *fifo,
 {
 	struct __vxge_hw_fifo_txdl_priv *txdl_priv;
 	struct vxge_hw_fifo_txd *txdp, *txdp_last;
-	struct __vxge_hw_channel *channel;
-
-	channel = &fifo->channel;
 
 	txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh);
 	txdp = (struct vxge_hw_fifo_txd *)txdlh  +  txdl_priv->frags;
@@ -1554,9 +1541,6 @@ void vxge_hw_fifo_txdl_post(struct __vxge_hw_fifo *fifo, void *txdlh)
 	struct __vxge_hw_fifo_txdl_priv *txdl_priv;
 	struct vxge_hw_fifo_txd *txdp_last;
 	struct vxge_hw_fifo_txd *txdp_first;
-	struct __vxge_hw_channel *channel;
-
-	channel = &fifo->channel;
 
 	txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh);
 	txdp_first = txdlh;
@@ -1672,10 +1656,7 @@ enum vxge_hw_status vxge_hw_fifo_handle_tcode(struct __vxge_hw_fifo *fifo,
 					      void *txdlh,
 					      enum vxge_hw_fifo_tcode t_code)
 {
-	struct __vxge_hw_channel *channel;
-
 	enum vxge_hw_status status = VXGE_HW_OK;
-	channel = &fifo->channel;
 
 	if (((t_code & 0x7) < 0) || ((t_code & 0x7) > 0x4)) {
 		status = VXGE_HW_ERR_INVALID_TCODE;
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 3cafa3d15082..24c4408b5734 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -15,6 +15,7 @@ nfp-objs := \
 	    nfpcore/nfp_resource.o \
 	    nfpcore/nfp_rtsym.o \
 	    nfpcore/nfp_target.o \
+	    nfp_asm.o \
 	    nfp_app.o \
 	    nfp_app_nic.o \
 	    nfp_devlink.o \
@@ -27,8 +28,6 @@ nfp-objs := \
 	    nfp_net_sriov.o \
 	    nfp_netvf_main.o \
 	    nfp_port.o \
-	    bpf/main.o \
-	    bpf/offload.o \
 	    nic/main.o
 
 ifeq ($(CONFIG_NFP_APP_FLOWER),y)
@@ -38,11 +37,14 @@ nfp-objs += \
 	    flower/main.o \
 	    flower/match.o \
 	    flower/metadata.o \
-	    flower/offload.o
+	    flower/offload.o \
+	    flower/tunnel_conf.o
 endif
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
 nfp-objs += \
+	    bpf/main.o \
+	    bpf/offload.o \
 	    bpf/verifier.o \
 	    bpf/jit.o
 endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 239dfbe8a0a1..995e95410b11 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -77,17 +77,6 @@ nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return meta->l.prev != &nfp_prog->insns;
 }
 
-static void nfp_prog_free(struct nfp_prog *nfp_prog)
-{
-	struct nfp_insn_meta *meta, *tmp;
-
-	list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) {
-		list_del(&meta->l);
-		kfree(meta);
-	}
-	kfree(nfp_prog);
-}
-
 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
 {
 	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
@@ -110,150 +99,7 @@ nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
 	return offset - nfp_prog->start_off;
 }
 
-/* --- SW reg --- */
-struct nfp_insn_ur_regs {
-	enum alu_dst_ab dst_ab;
-	u16 dst;
-	u16 areg, breg;
-	bool swap;
-	bool wr_both;
-};
-
-struct nfp_insn_re_regs {
-	enum alu_dst_ab dst_ab;
-	u8 dst;
-	u8 areg, breg;
-	bool swap;
-	bool wr_both;
-	bool i8;
-};
-
-static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst)
-{
-	u16 val = FIELD_GET(NN_REG_VAL, swreg);
-
-	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
-	case NN_REG_GPR_A:
-	case NN_REG_GPR_B:
-	case NN_REG_GPR_BOTH:
-		return val;
-	case NN_REG_NNR:
-		return UR_REG_NN | val;
-	case NN_REG_XFER:
-		return UR_REG_XFR | val;
-	case NN_REG_IMM:
-		if (val & ~0xff) {
-			pr_err("immediate too large\n");
-			return 0;
-		}
-		return UR_REG_IMM_encode(val);
-	case NN_REG_NONE:
-		return is_dst ? UR_REG_NO_DST : REG_NONE;
-	default:
-		pr_err("unrecognized reg encoding %08x\n", swreg);
-		return 0;
-	}
-}
-
-static int
-swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg)
-{
-	memset(reg, 0, sizeof(*reg));
-
-	/* Decode destination */
-	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
-		return -EFAULT;
-
-	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
-		reg->dst_ab = ALU_DST_B;
-	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
-		reg->wr_both = true;
-	reg->dst = nfp_swreg_to_unreg(dst, true);
-
-	/* Decode source operands */
-	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
-		return -EFAULT;
-
-	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
-	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
-		reg->areg = nfp_swreg_to_unreg(rreg, false);
-		reg->breg = nfp_swreg_to_unreg(lreg, false);
-		reg->swap = true;
-	} else {
-		reg->areg = nfp_swreg_to_unreg(lreg, false);
-		reg->breg = nfp_swreg_to_unreg(rreg, false);
-	}
-
-	return 0;
-}
-
-static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8)
-{
-	u16 val = FIELD_GET(NN_REG_VAL, swreg);
-
-	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
-	case NN_REG_GPR_A:
-	case NN_REG_GPR_B:
-	case NN_REG_GPR_BOTH:
-		return val;
-	case NN_REG_XFER:
-		return RE_REG_XFR | val;
-	case NN_REG_IMM:
-		if (val & ~(0x7f | has_imm8 << 7)) {
-			pr_err("immediate too large\n");
-			return 0;
-		}
-		*i8 = val & 0x80;
-		return RE_REG_IMM_encode(val & 0x7f);
-	case NN_REG_NONE:
-		return is_dst ? RE_REG_NO_DST : REG_NONE;
-	default:
-		pr_err("unrecognized reg encoding\n");
-		return 0;
-	}
-}
-
-static int
-swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg,
-		    bool has_imm8)
-{
-	memset(reg, 0, sizeof(*reg));
-
-	/* Decode destination */
-	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
-		return -EFAULT;
-
-	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
-		reg->dst_ab = ALU_DST_B;
-	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
-		reg->wr_both = true;
-	reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
-
-	/* Decode source operands */
-	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
-		return -EFAULT;
-
-	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
-	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
-		reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
-		reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
-		reg->swap = true;
-	} else {
-		reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
-		reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
-	}
-
-	return 0;
-}
-
 /* --- Emitters --- */
-static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
-	[CMD_TGT_WRITE8] =		{ 0x00, 0x42 },
-	[CMD_TGT_READ8] =		{ 0x01, 0x43 },
-	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
-	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
-};
-
 static void
 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
 	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
@@ -281,7 +127,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
 
 static void
 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
-	 u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync)
+	 u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync)
 {
 	struct nfp_insn_re_regs reg;
 	int err;
@@ -296,6 +142,11 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
 		nfp_prog->error = -EFAULT;
 		return;
 	}
+	if (reg.dst_lmextn || reg.src_lmextn) {
+		pr_err("cmd can't use LMextn\n");
+		nfp_prog->error = -EFAULT;
+		return;
+	}
 
 	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
 }
@@ -340,49 +191,10 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
 }
 
 static void
-__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8,
-	       u8 byte, bool equal, u16 addr, u8 defer)
-{
-	u16 addr_lo, addr_hi;
-	u64 insn;
-
-	addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO));
-	addr_hi = addr != addr_lo;
-
-	insn = OP_BBYTE_BASE |
-		FIELD_PREP(OP_BB_A_SRC, areg) |
-		FIELD_PREP(OP_BB_BYTE, byte) |
-		FIELD_PREP(OP_BB_B_SRC, breg) |
-		FIELD_PREP(OP_BB_I8, imm8) |
-		FIELD_PREP(OP_BB_EQ, equal) |
-		FIELD_PREP(OP_BB_DEFBR, defer) |
-		FIELD_PREP(OP_BB_ADDR_LO, addr_lo) |
-		FIELD_PREP(OP_BB_ADDR_HI, addr_hi);
-
-	nfp_prog_push(nfp_prog, insn);
-}
-
-static void
-emit_br_byte_neq(struct nfp_prog *nfp_prog,
-		 u32 dst, u8 imm, u8 byte, u16 addr, u8 defer)
-{
-	struct nfp_insn_re_regs reg;
-	int err;
-
-	err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), &reg, true);
-	if (err) {
-		nfp_prog->error = err;
-		return;
-	}
-
-	__emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr,
-		       defer);
-}
-
-static void
 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
 	     enum immed_width width, bool invert,
-	     enum immed_shift shift, bool wr_both)
+	     enum immed_shift shift, bool wr_both,
+	     bool dst_lmextn, bool src_lmextn)
 {
 	u64 insn;
 
@@ -393,19 +205,21 @@ __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
 		FIELD_PREP(OP_IMMED_WIDTH, width) |
 		FIELD_PREP(OP_IMMED_INV, invert) |
 		FIELD_PREP(OP_IMMED_SHIFT, shift) |
-		FIELD_PREP(OP_IMMED_WR_AB, wr_both);
+		FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
+		FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
+		FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
 
 	nfp_prog_push(nfp_prog, insn);
 }
 
 static void
-emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
+emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
 	   enum immed_width width, bool invert, enum immed_shift shift)
 {
 	struct nfp_insn_ur_regs reg;
 	int err;
 
-	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) {
+	if (swreg_type(dst) == NN_REG_IMM) {
 		nfp_prog->error = -EFAULT;
 		return;
 	}
@@ -417,13 +231,15 @@ emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
 	}
 
 	__emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
-		     invert, shift, reg.wr_both);
+		     invert, shift, reg.wr_both,
+		     reg.dst_lmextn, reg.src_lmextn);
 }
 
 static void
 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
 	   enum shf_sc sc, u8 shift,
-	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both)
+	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
+	   bool dst_lmextn, bool src_lmextn)
 {
 	u64 insn;
 
@@ -445,14 +261,16 @@ __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
 		FIELD_PREP(OP_SHF_SHIFT, shift) |
 		FIELD_PREP(OP_SHF_OP, op) |
 		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
-		FIELD_PREP(OP_SHF_WR_AB, wr_both);
+		FIELD_PREP(OP_SHF_WR_AB, wr_both) |
+		FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
+		FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
 
 	nfp_prog_push(nfp_prog, insn);
 }
 
 static void
-emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
-	 enum shf_sc sc, u8 shift)
+emit_shf(struct nfp_prog *nfp_prog, swreg dst,
+	 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
 {
 	struct nfp_insn_re_regs reg;
 	int err;
@@ -464,12 +282,14 @@ emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
 	}
 
 	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
-		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both);
+		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
+		   reg.dst_lmextn, reg.src_lmextn);
 }
 
 static void
 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
-	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both)
+	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
+	   bool dst_lmextn, bool src_lmextn)
 {
 	u64 insn;
 
@@ -480,13 +300,16 @@ __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
 		FIELD_PREP(OP_ALU_SW, swap) |
 		FIELD_PREP(OP_ALU_OP, op) |
 		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
-		FIELD_PREP(OP_ALU_WR_AB, wr_both);
+		FIELD_PREP(OP_ALU_WR_AB, wr_both) |
+		FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
+		FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
 
 	nfp_prog_push(nfp_prog, insn);
 }
 
 static void
-emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
+emit_alu(struct nfp_prog *nfp_prog, swreg dst,
+	 swreg lreg, enum alu_op op, swreg rreg)
 {
 	struct nfp_insn_ur_regs reg;
 	int err;
@@ -498,13 +321,15 @@ emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
 	}
 
 	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
-		   reg.areg, op, reg.breg, reg.swap, reg.wr_both);
+		   reg.areg, op, reg.breg, reg.swap, reg.wr_both,
+		   reg.dst_lmextn, reg.src_lmextn);
 }
 
 static void
 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
 		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
-		bool zero, bool swap, bool wr_both)
+		bool zero, bool swap, bool wr_both,
+		bool dst_lmextn, bool src_lmextn)
 {
 	u64 insn;
 
@@ -517,33 +342,84 @@ __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
 		FIELD_PREP(OP_LDF_ZF, zero) |
 		FIELD_PREP(OP_LDF_BMASK, bmask) |
 		FIELD_PREP(OP_LDF_SHF, shift) |
-		FIELD_PREP(OP_LDF_WR_AB, wr_both);
+		FIELD_PREP(OP_LDF_WR_AB, wr_both) |
+		FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
+		FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
 
 	nfp_prog_push(nfp_prog, insn);
 }
 
 static void
-emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift,
-		  u32 dst, u8 bmask, u32 src, bool zero)
+emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
+		  enum shf_sc sc, u8 shift, bool zero)
 {
 	struct nfp_insn_re_regs reg;
 	int err;
 
-	err = swreg_to_restricted(reg_none(), dst, src, &reg, true);
+	/* Note: ld_field is special as it uses one of the src regs as dst */
+	err = swreg_to_restricted(dst, dst, src, &reg, true);
 	if (err) {
 		nfp_prog->error = err;
 		return;
 	}
 
 	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
-			reg.i8, zero, reg.swap, reg.wr_both);
+			reg.i8, zero, reg.swap, reg.wr_both,
+			reg.dst_lmextn, reg.src_lmextn);
 }
 
 static void
-emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src,
+emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
 	      enum shf_sc sc, u8 shift)
 {
-	emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false);
+	emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
+}
+
+static void
+__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
+	    bool dst_lmextn, bool src_lmextn)
+{
+	u64 insn;
+
+	insn = OP_LCSR_BASE |
+		FIELD_PREP(OP_LCSR_A_SRC, areg) |
+		FIELD_PREP(OP_LCSR_B_SRC, breg) |
+		FIELD_PREP(OP_LCSR_WRITE, wr) |
+		FIELD_PREP(OP_LCSR_ADDR, addr) |
+		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
+		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	/* This instruction takes immeds instead of reg_none() for the ignored
+	 * operand, but we can't encode 2 immeds in one instr with our normal
+	 * swreg infra so if param is an immed, we encode as reg_none() and
+	 * copy the immed to both operands.
+	 */
+	if (swreg_type(src) == NN_REG_IMM) {
+		err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
+		reg.breg = reg.areg;
+	} else {
+		err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
+	}
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
+		    false, reg.src_lmextn);
+}
+
+static void emit_nop(struct nfp_prog *nfp_prog)
+{
+	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
 }
 
 /* --- Wrappers --- */
@@ -565,7 +441,7 @@ static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
 	return true;
 }
 
-static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
+static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
 {
 	enum immed_shift shift;
 	u16 val;
@@ -586,7 +462,7 @@ static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
  * If the @imm is small enough encode it directly in operand and return
  * otherwise load @imm to a spare register and return its encoding.
  */
-static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
 {
 	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
 		return reg_imm(imm);
@@ -599,7 +475,7 @@ static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
  * If the @imm is small enough encode it directly in operand and return
  * otherwise load @imm to a spare register and return its encoding.
  */
-static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
 {
 	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
 		return reg_imm(imm);
@@ -608,6 +484,12 @@ static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
 	return tmp_reg;
 }
 
+static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
+{
+	while (count--)
+		emit_nop(nfp_prog);
+}
+
 static void
 wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
 	       enum br_special special)
@@ -618,78 +500,374 @@ wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
 		FIELD_PREP(OP_BR_SPECIAL, special);
 }
 
+static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
+{
+	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
+}
+
 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
 {
-	emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src));
+	wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
 }
 
 static int
-construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset,
-		      u16 src, bool src_valid, u8 size)
+data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
 {
 	unsigned int i;
 	u16 shift, sz;
-	u32 tmp_reg;
 
 	/* We load the value from the address indicated in @offset and then
 	 * shift out the data we don't need.  Note: this is big endian!
 	 */
-	sz = size < 4 ? 4 : size;
+	sz = max(size, 4);
 	shift = size < 4 ? 4 - size : 0;
 
-	if (src_valid) {
-		/* Calculate the true offset (src_reg + imm) */
-		tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
-		emit_alu(nfp_prog, imm_both(nfp_prog),
-			 reg_a(src), ALU_OP_ADD, tmp_reg);
-		/* Check packet length (size guaranteed to fit b/c it's u8) */
-		emit_alu(nfp_prog, imm_a(nfp_prog),
-			 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
-		emit_alu(nfp_prog, reg_none(),
-			 NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog));
-		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
-		/* Load data */
-		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
-			 pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true);
-	} else {
-		/* Check packet length */
-		tmp_reg = ur_load_imm_any(nfp_prog, offset + size,
-					  imm_a(nfp_prog));
-		emit_alu(nfp_prog, reg_none(),
-			 NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg);
-		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
-		/* Load data */
-		tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
-		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
-			 pkt_reg(nfp_prog), tmp_reg, sz - 1, true);
-	}
+	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+		 pptr_reg(nfp_prog), offset, sz - 1, true);
 
 	i = 0;
 	if (shift)
-		emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE,
+		emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
 			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
 	else
 		for (; i * 4 < size; i++)
-			emit_alu(nfp_prog, reg_both(i),
-				 reg_none(), ALU_OP_NONE, reg_xfer(i));
+			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
+
+	if (i < 2)
+		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
+
+	return 0;
+}
+
+static int
+data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+		   u8 dst_gpr, int size)
+{
+	unsigned int i;
+	u8 mask, sz;
+
+	/* We load the value from the address indicated in @offset and then
+	 * mask out the data we don't need.  Note: this is little endian!
+	 */
+	sz = max(size, 4);
+	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
+
+	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0,
+		 reg_a(src_gpr), offset, sz / 4 - 1, true);
+
+	i = 0;
+	if (mask)
+		emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
+				  reg_xfer(0), SHF_SC_NONE, 0, true);
+	else
+		for (; i * 4 < size; i++)
+			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
 
 	if (i < 2)
-		wrp_immed(nfp_prog, reg_both(1), 0);
+		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
 
 	return 0;
 }
 
+static int
+construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
+{
+	swreg tmp_reg;
+
+	/* Calculate the true offset (src_reg + imm) */
+	tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+	emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
+
+	/* Check packet length (size guaranteed to fit b/c it's u8) */
+	emit_alu(nfp_prog, imm_a(nfp_prog),
+		 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
+	emit_alu(nfp_prog, reg_none(),
+		 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
+	wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+
+	/* Load data */
+	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
+}
+
 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
 {
-	return construct_data_ind_ld(nfp_prog, offset, 0, false, size);
+	swreg tmp_reg;
+
+	/* Check packet length */
+	tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
+	emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
+	wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+
+	/* Load data */
+	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+	return data_ld(nfp_prog, tmp_reg, 0, size);
+}
+
+static int
+data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
+		    u8 src_gpr, u8 size)
+{
+	unsigned int i;
+
+	for (i = 0; i * 4 < size; i++)
+		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
+
+	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+		 reg_a(dst_gpr), offset, size - 1, true);
+
+	return 0;
 }
 
-static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
+static int
+data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
+		   u64 imm, u8 size)
 {
-	emit_alu(nfp_prog, NFP_BPF_ABI_MARK,
-		 reg_none(), ALU_OP_NONE, reg_b(src));
-	emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS,
-		 NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK));
+	wrp_immed(nfp_prog, reg_xfer(0), imm);
+	if (size == 8)
+		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
+
+	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+		 reg_a(dst_gpr), offset, size - 1, true);
+
+	return 0;
+}
+
+typedef int
+(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
+	     unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
+	     bool needs_inc);
+
+static int
+wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
+	      unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
+	      bool needs_inc)
+{
+	bool should_inc = needs_inc && new_gpr && !last;
+	u32 idx, src_byte;
+	enum shf_sc sc;
+	swreg reg;
+	int shf;
+	u8 mask;
+
+	if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
+		return -EOPNOTSUPP;
+
+	idx = off / 4;
+
+	/* Move the entire word */
+	if (size == 4) {
+		wrp_mov(nfp_prog, reg_both(dst),
+			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
+		return 0;
+	}
+
+	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
+		return -EOPNOTSUPP;
+
+	src_byte = off % 4;
+
+	mask = (1 << size) - 1;
+	mask <<= dst_byte;
+
+	if (WARN_ON_ONCE(mask > 0xf))
+		return -EOPNOTSUPP;
+
+	shf = abs(src_byte - dst_byte) * 8;
+	if (src_byte == dst_byte) {
+		sc = SHF_SC_NONE;
+	} else if (src_byte < dst_byte) {
+		shf = 32 - shf;
+		sc = SHF_SC_L_SHF;
+	} else {
+		sc = SHF_SC_R_SHF;
+	}
+
+	/* ld_field can address fewer indexes, if offset too large do RMW.
+	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
+	 */
+	if (idx <= RE_REG_LM_IDX_MAX) {
+		reg = reg_lm(lm3 ? 3 : 0, idx);
+	} else {
+		reg = imm_a(nfp_prog);
+		/* If it's not the first part of the load and we start a new GPR
+		 * that means we are loading a second part of the LMEM word into
+		 * a new GPR.  IOW we've already looked that LMEM word and
+		 * therefore it has been loaded into imm_a().
+		 */
+		if (first || !new_gpr)
+			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
+	}
+
+	emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
+
+	if (should_inc)
+		wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
+
+	return 0;
+}
+
+static int
+wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
+	       unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
+	       bool needs_inc)
+{
+	bool should_inc = needs_inc && new_gpr && !last;
+	u32 idx, dst_byte;
+	enum shf_sc sc;
+	swreg reg;
+	int shf;
+	u8 mask;
+
+	if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
+		return -EOPNOTSUPP;
+
+	idx = off / 4;
+
+	/* Move the entire word */
+	if (size == 4) {
+		wrp_mov(nfp_prog,
+			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
+			reg_b(src));
+		return 0;
+	}
+
+	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
+		return -EOPNOTSUPP;
+
+	dst_byte = off % 4;
+
+	mask = (1 << size) - 1;
+	mask <<= dst_byte;
+
+	if (WARN_ON_ONCE(mask > 0xf))
+		return -EOPNOTSUPP;
+
+	shf = abs(src_byte - dst_byte) * 8;
+	if (src_byte == dst_byte) {
+		sc = SHF_SC_NONE;
+	} else if (src_byte < dst_byte) {
+		shf = 32 - shf;
+		sc = SHF_SC_L_SHF;
+	} else {
+		sc = SHF_SC_R_SHF;
+	}
+
+	/* ld_field can address fewer indexes, if offset too large do RMW.
+	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
+	 */
+	if (idx <= RE_REG_LM_IDX_MAX) {
+		reg = reg_lm(lm3 ? 3 : 0, idx);
+	} else {
+		reg = imm_a(nfp_prog);
+		/* Only first and last LMEM locations are going to need RMW,
+		 * the middle location will be overwritten fully.
+		 */
+		if (first || last)
+			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
+	}
+
+	emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
+
+	if (new_gpr || last) {
+		if (idx > RE_REG_LM_IDX_MAX)
+			wrp_mov(nfp_prog, reg_lm(0, idx), reg);
+		if (should_inc)
+			wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
+	}
+
+	return 0;
+}
+
+static int
+mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
+	     bool clr_gpr, lmem_step step)
+{
+	s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
+	bool first = true, last;
+	bool needs_inc = false;
+	swreg stack_off_reg;
+	u8 prev_gpr = 255;
+	u32 gpr_byte = 0;
+	bool lm3 = true;
+	int ret;
+
+	if (meta->ptr_not_const) {
+		/* Use of the last encountered ptr_off is OK, they all have
+		 * the same alignment.  Depend on low bits of value being
+		 * discarded when written to LMaddr register.
+		 */
+		stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
+						stack_imm(nfp_prog));
+
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
+
+		needs_inc = true;
+	} else if (off + size <= 64) {
+		/* We can reach bottom 64B with LMaddr0 */
+		lm3 = false;
+	} else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
+		/* We have to set up a new pointer.  If we know the offset
+		 * and the entire access falls into a single 32 byte aligned
+		 * window we won't have to increment the LM pointer.
+		 * The 32 byte alignment is imporant because offset is ORed in
+		 * not added when doing *l$indexN[off].
+		 */
+		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
+						stack_imm(nfp_prog));
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
+
+		off %= 32;
+	} else {
+		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
+						stack_imm(nfp_prog));
+
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
+
+		needs_inc = true;
+	}
+	if (lm3) {
+		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
+		/* For size < 4 one slot will be filled by zeroing of upper. */
+		wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
+	}
+
+	if (clr_gpr && size < 8)
+		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
+
+	while (size) {
+		u32 slice_end;
+		u8 slice_size;
+
+		slice_size = min(size, 4 - gpr_byte);
+		slice_end = min(off + slice_size, round_up(off + 1, 4));
+		slice_size = slice_end - off;
+
+		last = slice_size == size;
+
+		if (needs_inc)
+			off %= 4;
+
+		ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
+			   first, gpr != prev_gpr, last, lm3, needs_inc);
+		if (ret)
+			return ret;
+
+		prev_gpr = gpr;
+		first = false;
+
+		gpr_byte += slice_size;
+		if (gpr_byte >= 4) {
+			gpr_byte -= 4;
+			gpr++;
+		}
+
+		size -= slice_size;
+		off += slice_size;
+	}
 
 	return 0;
 }
@@ -697,7 +875,7 @@ static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
 static void
 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
 {
-	u32 tmp_reg;
+	swreg tmp_reg;
 
 	if (alu_op == ALU_OP_AND) {
 		if (!imm)
@@ -714,7 +892,7 @@ wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
 	if (alu_op == ALU_OP_XOR) {
 		if (!~imm)
 			emit_alu(nfp_prog, reg_both(dst), reg_none(),
-				 ALU_OP_NEG, reg_b(dst));
+				 ALU_OP_NOT, reg_b(dst));
 		if (!imm || !~imm)
 			return;
 	}
@@ -815,7 +993,7 @@ wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
 	u8 reg = insn->dst_reg * 2;
-	u32 tmp_reg;
+	swreg tmp_reg;
 
 	if (insn->off < 0) /* TODO */
 		return -EOPNOTSUPP;
@@ -844,7 +1022,10 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	    enum br_mask br_mask, bool swap)
 {
 	const struct bpf_insn *insn = &meta->insn;
-	u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2;
+	u8 areg, breg;
+
+	areg = insn->dst_reg * 2;
+	breg = insn->src_reg * 2;
 
 	if (insn->off < 0) /* TODO */
 		return -EOPNOTSUPP;
@@ -863,13 +1044,34 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	return 0;
 }
 
+static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
+{
+	emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
+		      SHF_SC_R_ROT, 8);
+	emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
+		      SHF_SC_R_ROT, 16);
+}
+
 /* --- Callbacks --- */
 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
-
-	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
-	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1);
+	u8 dst = insn->dst_reg * 2;
+	u8 src = insn->src_reg * 2;
+
+	if (insn->src_reg == BPF_REG_10) {
+		swreg stack_depth_reg;
+
+		stack_depth_reg = ur_load_imm_any(nfp_prog,
+						  nfp_prog->stack_depth,
+						  stack_imm(nfp_prog));
+		emit_alu(nfp_prog, reg_both(dst),
+			 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
+		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	} else {
+		wrp_reg_mov(nfp_prog, dst, src);
+		wrp_reg_mov(nfp_prog, dst + 1, src + 1);
+	}
 
 	return 0;
 }
@@ -964,28 +1166,64 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
-static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 
-	if (insn->imm != 32)
-		return 1; /* TODO */
-
-	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2);
-	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0);
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
+		 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
+		 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
 
 	return 0;
 }
 
-static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+
+	if (insn->imm < 32) {
+		emit_shf(nfp_prog, reg_both(dst + 1),
+			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
+			 SHF_SC_R_DSHF, 32 - insn->imm);
+		emit_shf(nfp_prog, reg_both(dst),
+			 reg_none(), SHF_OP_NONE, reg_b(dst),
+			 SHF_SC_L_SHF, insn->imm);
+	} else if (insn->imm == 32) {
+		wrp_reg_mov(nfp_prog, dst + 1, dst);
+		wrp_immed(nfp_prog, reg_both(dst), 0);
+	} else if (insn->imm > 32) {
+		emit_shf(nfp_prog, reg_both(dst + 1),
+			 reg_none(), SHF_OP_NONE, reg_b(dst),
+			 SHF_SC_L_SHF, insn->imm - 32);
+		wrp_immed(nfp_prog, reg_both(dst), 0);
+	}
 
-	if (insn->imm != 32)
-		return 1; /* TODO */
+	return 0;
+}
 
-	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1);
-	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 dst = insn->dst_reg * 2;
+
+	if (insn->imm < 32) {
+		emit_shf(nfp_prog, reg_both(dst),
+			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
+			 SHF_SC_R_DSHF, insn->imm);
+		emit_shf(nfp_prog, reg_both(dst + 1),
+			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
+			 SHF_SC_R_SHF, insn->imm);
+	} else if (insn->imm == 32) {
+		wrp_reg_mov(nfp_prog, dst, dst + 1);
+		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	} else if (insn->imm > 32) {
+		emit_shf(nfp_prog, reg_both(dst),
+			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
+			 SHF_SC_R_SHF, insn->imm - 32);
+		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+	}
 
 	return 0;
 }
@@ -1060,6 +1298,16 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
 }
 
+static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	u8 dst = meta->insn.dst_reg * 2;
+
+	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -1075,21 +1323,59 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
+static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 gpr = insn->dst_reg * 2;
+
+	switch (insn->imm) {
+	case 16:
+		emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
+			      SHF_SC_R_ROT, 8);
+		emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
+			      SHF_SC_R_SHF, 16);
+
+		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
+		break;
+	case 32:
+		wrp_end32(nfp_prog, reg_a(gpr), gpr);
+		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
+		break;
+	case 64:
+		wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
+
+		wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
+		wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
+		break;
+	}
+
+	return 0;
+}
+
 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1),
-		  meta->insn.imm);
+	struct nfp_insn_meta *prev = nfp_meta_prev(meta);
+	u32 imm_lo, imm_hi;
+	u8 dst;
+
+	dst = prev->insn.dst_reg * 2;
+	imm_lo = prev->insn.imm;
+	imm_hi = meta->insn.imm;
+
+	wrp_immed(nfp_prog, reg_both(dst), imm_lo);
+
+	/* mov is always 1 insn, load imm may be two, so try to use mov */
+	if (imm_hi == imm_lo)
+		wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
+	else
+		wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
 
 	return 0;
 }
 
 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	const struct bpf_insn *insn = &meta->insn;
-
 	meta->double_cb = imm_ld8_part2;
-	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
-
 	return 0;
 }
 
@@ -1111,82 +1397,235 @@ static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
-				     meta->insn.src_reg * 2, true, 1);
+				     meta->insn.src_reg * 2, 1);
 }
 
 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
-				     meta->insn.src_reg * 2, true, 2);
+				     meta->insn.src_reg * 2, 2);
 }
 
 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
-				     meta->insn.src_reg * 2, true, 4);
+				     meta->insn.src_reg * 2, 4);
 }
 
-static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int
+mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      unsigned int size, unsigned int ptr_off)
 {
-	if (meta->insn.off == offsetof(struct sk_buff, len))
-		emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
-			 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN);
-	else
+	return mem_op_stack(nfp_prog, meta, size, ptr_off,
+			    meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
+			    true, wrp_lmem_load);
+}
+
+static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		       u8 size)
+{
+	swreg dst = reg_both(meta->insn.dst_reg * 2);
+
+	switch (meta->insn.off) {
+	case offsetof(struct __sk_buff, len):
+		if (size != FIELD_SIZEOF(struct __sk_buff, len))
+			return -EOPNOTSUPP;
+		wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
+		break;
+	case offsetof(struct __sk_buff, data):
+		if (size != FIELD_SIZEOF(struct __sk_buff, data))
+			return -EOPNOTSUPP;
+		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
+		break;
+	case offsetof(struct __sk_buff, data_end):
+		if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
+			return -EOPNOTSUPP;
+		emit_alu(nfp_prog, dst,
+			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
+		break;
+	default:
 		return -EOPNOTSUPP;
+	}
+
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
 
 	return 0;
 }
 
-static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		       u8 size)
 {
-	u32 dst = reg_both(meta->insn.dst_reg * 2);
+	swreg dst = reg_both(meta->insn.dst_reg * 2);
 
-	if (meta->insn.off != offsetof(struct xdp_md, data) &&
-	    meta->insn.off != offsetof(struct xdp_md, data_end))
+	switch (meta->insn.off) {
+	case offsetof(struct xdp_md, data):
+		if (size != FIELD_SIZEOF(struct xdp_md, data))
+			return -EOPNOTSUPP;
+		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
+		break;
+	case offsetof(struct xdp_md, data_end):
+		if (size != FIELD_SIZEOF(struct xdp_md, data_end))
+			return -EOPNOTSUPP;
+		emit_alu(nfp_prog, dst,
+			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
+		break;
+	default:
 		return -EOPNOTSUPP;
+	}
 
-	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
 
-	if (meta->insn.off == offsetof(struct xdp_md, data))
-		return 0;
+	return 0;
+}
 
-	emit_alu(nfp_prog, dst,	dst, ALU_OP_ADD, NFP_BPF_ABI_LEN);
+static int
+mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	     unsigned int size)
+{
+	swreg tmp_reg;
 
-	return 0;
+	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+	return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg,
+				  meta->insn.dst_reg * 2, size);
+}
+
+static int
+mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	unsigned int size)
+{
+	if (meta->ptr.type == PTR_TO_CTX) {
+		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
+			return mem_ldx_xdp(nfp_prog, meta, size);
+		else
+			return mem_ldx_skb(nfp_prog, meta, size);
+	}
+
+	if (meta->ptr.type == PTR_TO_PACKET)
+		return mem_ldx_data(nfp_prog, meta, size);
+
+	if (meta->ptr.type == PTR_TO_STACK)
+		return mem_ldx_stack(nfp_prog, meta, size,
+				     meta->ptr.off + meta->ptr.var_off.value);
+
+	return -EOPNOTSUPP;
+}
+
+static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_ldx(nfp_prog, meta, 1);
+}
+
+static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_ldx(nfp_prog, meta, 2);
 }
 
 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	int ret;
+	return mem_ldx(nfp_prog, meta, 4);
+}
 
-	if (nfp_prog->act == NN_ACT_XDP)
-		ret = mem_ldx4_xdp(nfp_prog, meta);
-	else
-		ret = mem_ldx4_skb(nfp_prog, meta);
+static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_ldx(nfp_prog, meta, 8);
+}
 
-	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+static int
+mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	    unsigned int size)
+{
+	u64 imm = meta->insn.imm; /* sign extend */
+	swreg off_reg;
+
+	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
-	return ret;
+	return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
+				  imm, size);
 }
 
-static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  unsigned int size)
 {
-	if (meta->insn.off == offsetof(struct sk_buff, mark))
-		return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
+	if (meta->ptr.type == PTR_TO_PACKET)
+		return mem_st_data(nfp_prog, meta, size);
 
 	return -EOPNOTSUPP;
 }
 
-static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_st(nfp_prog, meta, 1);
+}
+
+static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_st(nfp_prog, meta, 2);
+}
+
+static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_st(nfp_prog, meta, 4);
+}
+
+static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
+	return mem_st(nfp_prog, meta, 8);
+}
+
+static int
+mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	     unsigned int size)
+{
+	swreg off_reg;
+
+	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+	return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
+				   meta->insn.src_reg * 2, size);
+}
+
+static int
+mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      unsigned int size, unsigned int ptr_off)
+{
+	return mem_op_stack(nfp_prog, meta, size, ptr_off,
+			    meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
+			    false, wrp_lmem_store);
+}
+
+static int
+mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	unsigned int size)
+{
+	if (meta->ptr.type == PTR_TO_PACKET)
+		return mem_stx_data(nfp_prog, meta, size);
+
+	if (meta->ptr.type == PTR_TO_STACK)
+		return mem_stx_stack(nfp_prog, meta, size,
+				     meta->ptr.off + meta->ptr.var_off.value);
+
 	return -EOPNOTSUPP;
 }
 
+static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_stx(nfp_prog, meta, 1);
+}
+
+static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_stx(nfp_prog, meta, 2);
+}
+
 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	if (nfp_prog->act == NN_ACT_XDP)
-		return mem_stx4_xdp(nfp_prog, meta);
-	return mem_stx4_skb(nfp_prog, meta);
+	return mem_stx(nfp_prog, meta, 4);
+}
+
+static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_stx(nfp_prog, meta, 8);
 }
 
 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -1202,8 +1641,10 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
-	u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1);
-	u32 tmp_reg;
+	swreg or1, or2, tmp_reg;
+
+	or1 = reg_a(insn->dst_reg * 2);
+	or2 = reg_b(insn->dst_reg * 2 + 1);
 
 	if (insn->off < 0) /* TODO */
 		return -EOPNOTSUPP;
@@ -1230,29 +1671,29 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
+	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
 }
 
 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
+	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
 }
 
 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
+	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
 }
 
 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
+	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
 }
 
 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
-	u32 tmp_reg;
+	swreg tmp_reg;
 
 	if (insn->off < 0) /* TODO */
 		return -EOPNOTSUPP;
@@ -1283,7 +1724,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
 	u64 imm = insn->imm; /* sign extend */
-	u32 tmp_reg;
+	swreg tmp_reg;
 
 	if (insn->off < 0) /* TODO */
 		return -EOPNOTSUPP;
@@ -1292,6 +1733,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
 			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+		return 0;
 	}
 
 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
@@ -1327,22 +1769,22 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
+	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
 }
 
 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
+	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
 }
 
 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
+	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
 }
 
 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
+	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
 }
 
 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -1375,6 +1817,7 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
+	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
@@ -1389,7 +1832,9 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
 	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
 	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
+	[BPF_ALU | BPF_NEG] =		neg_reg,
 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
+	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
 	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
 	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
 	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
@@ -1397,8 +1842,18 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
 	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
 	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
+	[BPF_LDX | BPF_MEM | BPF_B] =	mem_ldx1,
+	[BPF_LDX | BPF_MEM | BPF_H] =	mem_ldx2,
 	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
+	[BPF_LDX | BPF_MEM | BPF_DW] =	mem_ldx8,
+	[BPF_STX | BPF_MEM | BPF_B] =	mem_stx1,
+	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
 	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
+	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
+	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
+	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
+	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
+	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
 	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
@@ -1510,37 +1965,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
 
 static void nfp_intro(struct nfp_prog *nfp_prog)
 {
-	emit_alu(nfp_prog, pkt_reg(nfp_prog),
-		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
-}
-
-static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
-{
-	const u8 act2code[] = {
-		[NN_ACT_TC_DROP]  = 0x22,
-		[NN_ACT_TC_REDIR] = 0x24
-	};
-	/* Target for aborts */
-	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
-	wrp_immed(nfp_prog, reg_both(0), 0);
-
-	/* Target for normal exits */
-	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
-	/* Legacy TC mode:
-	 *   0        0x11 -> pass,  count as stat0
-	 *  -1  drop  0x22 -> drop,  count as stat1
-	 *     redir  0x24 -> redir, count as stat1
-	 *  ife mark  0x21 -> pass,  count as stat1
-	 *  ife + tx  0x24 -> redir, count as stat1
-	 */
-	emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2);
-	emit_alu(nfp_prog, reg_a(0),
-		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
-	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
-
-	emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1);
-	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]),
-		      SHF_SC_L_SHF, 16);
+	wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
+	emit_alu(nfp_prog, plen_reg(nfp_prog),
+		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
 }
 
 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
@@ -1562,8 +1989,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
 
 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
 
-	emit_alu(nfp_prog, reg_a(0),
-		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
 
 	/* Target for normal exits */
@@ -1572,8 +1998,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
 	/* if R0 > 7 jump to abort */
 	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
-	emit_alu(nfp_prog, reg_a(0),
-		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
 
 	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
 	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
@@ -1610,8 +2035,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
 
 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
 
-	emit_alu(nfp_prog, reg_a(0),
-		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
 
 	/* Target for normal exits */
@@ -1632,24 +2056,21 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
 
 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
 
-	emit_alu(nfp_prog, reg_a(0),
-		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
 }
 
 static void nfp_outro(struct nfp_prog *nfp_prog)
 {
-	switch (nfp_prog->act) {
-	case NN_ACT_DIRECT:
+	switch (nfp_prog->type) {
+	case BPF_PROG_TYPE_SCHED_CLS:
 		nfp_outro_tc_da(nfp_prog);
 		break;
-	case NN_ACT_TC_DROP:
-	case NN_ACT_TC_REDIR:
-		nfp_outro_tc_legacy(nfp_prog);
-		break;
-	case NN_ACT_XDP:
+	case BPF_PROG_TYPE_XDP:
 		nfp_outro_xdp(nfp_prog);
 		break;
+	default:
+		WARN_ON(1);
 	}
 }
 
@@ -1688,29 +2109,11 @@ static int nfp_translate(struct nfp_prog *nfp_prog)
 	if (nfp_prog->error)
 		return nfp_prog->error;
 
-	return nfp_fixup_branches(nfp_prog);
-}
-
-static int
-nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
-		 unsigned int cnt)
-{
-	unsigned int i;
-
-	for (i = 0; i < cnt; i++) {
-		struct nfp_insn_meta *meta;
-
-		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
-		if (!meta)
-			return -ENOMEM;
-
-		meta->insn = prog[i];
-		meta->n = i;
-
-		list_add_tail(&meta->l, &nfp_prog->insns);
-	}
+	wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
+	if (nfp_prog->error)
+		return nfp_prog->error;
 
-	return 0;
+	return nfp_fixup_branches(nfp_prog);
 }
 
 /* --- Optimizations --- */
@@ -1737,38 +2140,6 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
 	}
 }
 
-/* Try to rename registers so that program uses only low ones */
-static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog)
-{
-	bool reg_used[MAX_BPF_REG] = {};
-	u8 tgt_reg[MAX_BPF_REG] = {};
-	struct nfp_insn_meta *meta;
-	unsigned int i, j;
-
-	list_for_each_entry(meta, &nfp_prog->insns, l) {
-		if (meta->skip)
-			continue;
-
-		reg_used[meta->insn.src_reg] = true;
-		reg_used[meta->insn.dst_reg] = true;
-	}
-
-	for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) {
-		if (!reg_used[i])
-			continue;
-
-		tgt_reg[i] = j++;
-	}
-	nfp_prog->num_regs = j;
-
-	list_for_each_entry(meta, &nfp_prog->insns, l) {
-		meta->insn.src_reg = tgt_reg[meta->insn.src_reg];
-		meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg];
-	}
-
-	return 0;
-}
-
 /* Remove masking after load since our load guarantees this is not needed */
 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
 {
@@ -1845,79 +2216,47 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
 
 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
 {
-	int ret;
-
 	nfp_bpf_opt_reg_init(nfp_prog);
 
-	ret = nfp_bpf_opt_reg_rename(nfp_prog);
-	if (ret)
-		return ret;
-
 	nfp_bpf_opt_ld_mask(nfp_prog);
 	nfp_bpf_opt_ld_shift(nfp_prog);
 
 	return 0;
 }
 
-/**
- * nfp_bpf_jit() - translate BPF code into NFP assembly
- * @filter:	kernel BPF filter struct
- * @prog_mem:	memory to store assembler instructions
- * @act:	action attached to this eBPF program
- * @prog_start:	offset of the first instruction when loaded
- * @prog_done:	where to jump on exit
- * @prog_sz:	size of @prog_mem in instructions
- * @res:	achieved parameters of translation results
- */
-int
-nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem,
-	    enum nfp_bpf_action_type act,
-	    unsigned int prog_start, unsigned int prog_done,
-	    unsigned int prog_sz, struct nfp_bpf_result *res)
+static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore)
 {
-	struct nfp_prog *nfp_prog;
-	int ret;
+	int i;
 
-	nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL);
-	if (!nfp_prog)
-		return -ENOMEM;
+	for (i = 0; i < nfp_prog->prog_len; i++) {
+		int err;
 
-	INIT_LIST_HEAD(&nfp_prog->insns);
-	nfp_prog->act = act;
-	nfp_prog->start_off = prog_start;
-	nfp_prog->tgt_done = prog_done;
+		err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]);
+		if (err)
+			return err;
 
-	ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len);
-	if (ret)
-		goto out;
+		nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]);
 
-	ret = nfp_prog_verify(nfp_prog, filter);
-	if (ret)
-		goto out;
+		ustore[i] = cpu_to_le64(nfp_prog->prog[i]);
+	}
 
-	ret = nfp_bpf_optimize(nfp_prog);
-	if (ret)
-		goto out;
+	return 0;
+}
 
-	if (nfp_prog->num_regs <= 7)
-		nfp_prog->regs_per_thread = 16;
-	else
-		nfp_prog->regs_per_thread = 32;
+int nfp_bpf_jit(struct nfp_prog *nfp_prog)
+{
+	int ret;
 
-	nfp_prog->prog = prog_mem;
-	nfp_prog->__prog_alloc_len = prog_sz;
+	ret = nfp_bpf_optimize(nfp_prog);
+	if (ret)
+		return ret;
 
 	ret = nfp_translate(nfp_prog);
 	if (ret) {
 		pr_err("Translation failed with error %d (translated: %u)\n",
 		       ret, nfp_prog->n_translated);
-		ret = -EINVAL;
+		return -EINVAL;
 	}
 
-	res->n_instr = nfp_prog->prog_len;
-	res->dense_mode = nfp_prog->num_regs <= 7;
-out:
-	nfp_prog_free(nfp_prog);
-
-	return ret;
+	return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index be2cf10a2cd7..e379b78e86ef 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -42,9 +42,11 @@
 
 static bool nfp_net_ebpf_capable(struct nfp_net *nn)
 {
+#ifdef __LITTLE_ENDIAN
 	if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
 	    nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
 		return true;
+#endif
 	return false;
 }
 
@@ -52,28 +54,25 @@ static int
 nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
 		    struct bpf_prog *prog)
 {
-	struct tc_cls_bpf_offload cmd = {
-		.prog = prog,
-	};
+	bool running, xdp_running;
 	int ret;
 
 	if (!nfp_net_ebpf_capable(nn))
 		return -EINVAL;
 
-	if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) {
-		if (!nn->dp.bpf_offload_xdp)
-			return prog ? -EBUSY : 0;
-		cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY;
-	} else {
-		if (!prog)
-			return 0;
-		cmd.command = TC_CLSBPF_ADD;
-	}
+	running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF;
+	xdp_running = running && nn->dp.bpf_offload_xdp;
+
+	if (!prog && !xdp_running)
+		return 0;
+	if (prog && running && !xdp_running)
+		return -EBUSY;
 
-	ret = nfp_net_bpf_offload(nn, &cmd);
+	ret = nfp_net_bpf_offload(nn, prog, running);
 	/* Stop offload if replace not possible */
-	if (ret && cmd.command == TC_CLSBPF_REPLACE)
+	if (ret && prog)
 		nfp_bpf_xdp_offload(app, nn, NULL);
+
 	nn->dp.bpf_offload_xdp = prog && !ret;
 	return ret;
 }
@@ -83,59 +82,78 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
 	return nfp_net_ebpf_capable(nn) ? "BPF" : "";
 }
 
-static int
-nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
-{
-	struct nfp_net_bpf_priv *priv;
-	int ret;
-
-	/* Limit to single port, otherwise it's just a NIC */
-	if (id > 0) {
-		nfp_warn(app->cpp,
-			 "BPF NIC doesn't support more than one port right now\n");
-		nn->port = nfp_port_alloc(app, NFP_PORT_INVALID, nn->dp.netdev);
-		return PTR_ERR_OR_ZERO(nn->port);
-	}
-
-	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	nn->app_priv = priv;
-	spin_lock_init(&priv->rx_filter_lock);
-	setup_timer(&priv->rx_filter_stats_timer,
-		    nfp_net_filter_stats_timer, (unsigned long)nn);
-
-	ret = nfp_app_nic_vnic_alloc(app, nn, id);
-	if (ret)
-		kfree(priv);
-
-	return ret;
-}
-
 static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 {
 	if (nn->dp.bpf_offload_xdp)
 		nfp_bpf_xdp_offload(app, nn, NULL);
-	kfree(nn->app_priv);
 }
 
-static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			    enum tc_setup_type type, void *type_data)
+static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
+				     void *type_data, void *cb_priv)
 {
 	struct tc_cls_bpf_offload *cls_bpf = type_data;
-	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net *nn = cb_priv;
 
-	if (type != TC_SETUP_CLSBPF || !nfp_net_ebpf_capable(nn) ||
-	    !is_classid_clsact_ingress(cls_bpf->common.classid) ||
+	if (type != TC_SETUP_CLSBPF ||
+	    !tc_can_offload(nn->dp.netdev) ||
+	    !nfp_net_ebpf_capable(nn) ||
 	    cls_bpf->common.protocol != htons(ETH_P_ALL) ||
 	    cls_bpf->common.chain_index)
 		return -EOPNOTSUPP;
-
 	if (nn->dp.bpf_offload_xdp)
 		return -EBUSY;
 
-	return nfp_net_bpf_offload(nn, cls_bpf);
+	/* Only support TC direct action */
+	if (!cls_bpf->exts_integrated ||
+	    tcf_exts_has_actions(cls_bpf->exts)) {
+		nn_err(nn, "only direct action with no legacy actions supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	switch (cls_bpf->command) {
+	case TC_CLSBPF_REPLACE:
+		return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
+	case TC_CLSBPF_ADD:
+		return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
+	case TC_CLSBPF_DESTROY:
+		return nfp_net_bpf_offload(nn, NULL, true);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int nfp_bpf_setup_tc_block(struct net_device *netdev,
+				  struct tc_block_offload *f)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block,
+					     nfp_bpf_setup_tc_block_cb,
+					     nn, nn);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block,
+					nfp_bpf_setup_tc_block_cb,
+					nn);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
+			    enum tc_setup_type type, void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return nfp_bpf_setup_tc_block(netdev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn)
@@ -149,10 +167,14 @@ const struct nfp_app_type app_bpf = {
 
 	.extra_cap	= nfp_bpf_extra_cap,
 
-	.vnic_alloc	= nfp_bpf_vnic_alloc,
+	.vnic_alloc	= nfp_app_nic_vnic_alloc,
 	.vnic_free	= nfp_bpf_vnic_free,
 
 	.setup_tc	= nfp_bpf_setup_tc,
 	.tc_busy	= nfp_bpf_tc_busy,
 	.xdp_offload	= nfp_bpf_xdp_offload,
+
+	.bpf_verifier_prep	= nfp_bpf_verifier_prep,
+	.bpf_translate		= nfp_bpf_translate,
+	.bpf_destroy		= nfp_bpf_destroy,
 };
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 4051e943f363..082a15f6dfb5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -36,10 +36,11 @@
 
 #include <linux/bitfield.h>
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
-#include "../nfp_net.h"
+#include "../nfp_asm.h"
 
 /* For branch fixup logic use up-most byte of branch instruction as scratch
  * area.  Remember to clear this before sending instructions to HW!
@@ -53,51 +54,29 @@ enum br_special {
 };
 
 enum static_regs {
-	STATIC_REG_PKT		= 1,
-#define REG_PKT_BANK	ALU_DST_A
-	STATIC_REG_IMM		= 2, /* Bank AB */
+	STATIC_REG_IMM		= 21, /* Bank AB */
+	STATIC_REG_STACK	= 22, /* Bank A */
+	STATIC_REG_PKT_LEN	= 22, /* Bank B */
 };
 
-enum nfp_bpf_action_type {
-	NN_ACT_TC_DROP,
-	NN_ACT_TC_REDIR,
-	NN_ACT_DIRECT,
-	NN_ACT_XDP,
+enum pkt_vec {
+	PKT_VEC_PKT_LEN		= 0,
+	PKT_VEC_PKT_PTR		= 2,
 };
 
-/* Software register representation, hardware encoding in asm.h */
-#define NN_REG_TYPE	GENMASK(31, 24)
-#define NN_REG_VAL	GENMASK(7, 0)
-
-enum nfp_bpf_reg_type {
-	NN_REG_GPR_A =	BIT(0),
-	NN_REG_GPR_B =	BIT(1),
-	NN_REG_NNR =	BIT(2),
-	NN_REG_XFER =	BIT(3),
-	NN_REG_IMM =	BIT(4),
-	NN_REG_NONE =	BIT(5),
-};
-
-#define NN_REG_GPR_BOTH	(NN_REG_GPR_A | NN_REG_GPR_B)
-
-#define reg_both(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
-#define reg_a(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
-#define reg_b(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
-#define reg_nnr(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
-#define reg_xfer(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
-#define reg_imm(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
-#define reg_none()	(FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
+#define pv_len(np)	reg_lm(1, PKT_VEC_PKT_LEN)
+#define pv_ctm_ptr(np)	reg_lm(1, PKT_VEC_PKT_PTR)
 
-#define pkt_reg(np)	reg_a((np)->regs_per_thread - STATIC_REG_PKT)
-#define imm_a(np)	reg_a((np)->regs_per_thread - STATIC_REG_IMM)
-#define imm_b(np)	reg_b((np)->regs_per_thread - STATIC_REG_IMM)
-#define imm_both(np)	reg_both((np)->regs_per_thread - STATIC_REG_IMM)
+#define stack_reg(np)	reg_a(STATIC_REG_STACK)
+#define stack_imm(np)	imm_b(np)
+#define plen_reg(np)	reg_b(STATIC_REG_PKT_LEN)
+#define pptr_reg(np)	pv_ctm_ptr(np)
+#define imm_a(np)	reg_a(STATIC_REG_IMM)
+#define imm_b(np)	reg_b(STATIC_REG_IMM)
+#define imm_both(np)	reg_both(STATIC_REG_IMM)
 
-#define NFP_BPF_ABI_FLAGS	reg_nnr(0)
+#define NFP_BPF_ABI_FLAGS	reg_imm(0)
 #define   NFP_BPF_ABI_FLAG_MARK	1
-#define NFP_BPF_ABI_MARK	reg_nnr(1)
-#define NFP_BPF_ABI_PKT		reg_nnr(2)
-#define NFP_BPF_ABI_LEN		reg_nnr(3)
 
 struct nfp_prog;
 struct nfp_insn_meta;
@@ -113,6 +92,8 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
 /**
  * struct nfp_insn_meta - BPF instruction wrapper
  * @insn: BPF instruction
+ * @ptr: pointer type for memory operations
+ * @ptr_not_const: pointer is not always constant
  * @off: index of first generated machine instruction (in nfp_prog.prog)
  * @n: eBPF instruction number
  * @skip: skip this instruction (optimized out)
@@ -121,6 +102,8 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
  */
 struct nfp_insn_meta {
 	struct bpf_insn insn;
+	struct bpf_reg_state ptr;
+	bool ptr_not_const;
 	unsigned int off;
 	unsigned short n;
 	bool skip;
@@ -156,15 +139,15 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
  * @prog: machine code
  * @prog_len: number of valid instructions in @prog array
  * @__prog_alloc_len: alloc size of @prog array
- * @act: BPF program/action type (TC DA, TC with action, XDP etc.)
- * @num_regs: number of registers used by this program
- * @regs_per_thread: number of basic registers allocated per thread
+ * @verifier_meta: temporary storage for verifier's insn meta
+ * @type: BPF program type
  * @start_off: address of the first instruction in the memory
  * @tgt_out: jump target for normal exit
  * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
  * @tgt_done: jump target to get the next packet
  * @n_translated: number of successfully translated instructions (for errors)
  * @error: error code if something went wrong
+ * @stack_depth: max stack depth from the verifier
  * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
  */
 struct nfp_prog {
@@ -172,10 +155,9 @@ struct nfp_prog {
 	unsigned int prog_len;
 	unsigned int __prog_alloc_len;
 
-	enum nfp_bpf_action_type act;
+	struct nfp_insn_meta *verifier_meta;
 
-	unsigned int num_regs;
-	unsigned int regs_per_thread;
+	enum bpf_prog_type type;
 
 	unsigned int start_off;
 	unsigned int tgt_out;
@@ -185,40 +167,26 @@ struct nfp_prog {
 	unsigned int n_translated;
 	int error;
 
-	struct list_head insns;
-};
+	unsigned int stack_depth;
 
-struct nfp_bpf_result {
-	unsigned int n_instr;
-	bool dense_mode;
+	struct list_head insns;
 };
 
-int
-nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
-	    unsigned int prog_start, unsigned int prog_done,
-	    unsigned int prog_sz, struct nfp_bpf_result *res);
+int nfp_bpf_jit(struct nfp_prog *prog);
 
-int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog);
+extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
 
+struct netdev_bpf;
+struct nfp_app;
 struct nfp_net;
-struct tc_cls_bpf_offload;
-
-/**
- * struct nfp_net_bpf_priv - per-vNIC BPF private data
- * @rx_filter:		Filter offload statistics - dropped packets/bytes
- * @rx_filter_prev:	Filter offload statistics - values from previous update
- * @rx_filter_change:	Jiffies when statistics last changed
- * @rx_filter_stats_timer:  Timer for polling filter offload statistics
- * @rx_filter_lock:	Lock protecting timer state changes (teardown)
- */
-struct nfp_net_bpf_priv {
-	struct nfp_stat_pair rx_filter, rx_filter_prev;
-	unsigned long rx_filter_change;
-	struct timer_list rx_filter_stats_timer;
-	spinlock_t rx_filter_lock;
-};
 
-int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf);
-void nfp_net_filter_stats_timer(unsigned long data);
+int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
+			bool old_prog);
 
+int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
+			  struct netdev_bpf *bpf);
+int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
+		      struct bpf_prog *prog);
+int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
+		    struct bpf_prog *prog);
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index a88bb5bc0082..b6cee71f49d3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -51,112 +51,114 @@
 #include "../nfp_net_ctrl.h"
 #include "../nfp_net.h"
 
-void nfp_net_filter_stats_timer(unsigned long data)
+static int
+nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
+		 unsigned int cnt)
 {
-	struct nfp_net *nn = (void *)data;
-	struct nfp_net_bpf_priv *priv;
-	struct nfp_stat_pair latest;
-
-	priv = nn->app_priv;
-
-	spin_lock_bh(&priv->rx_filter_lock);
+	unsigned int i;
 
-	if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)
-		mod_timer(&priv->rx_filter_stats_timer,
-			  jiffies + NFP_NET_STAT_POLL_IVL);
+	for (i = 0; i < cnt; i++) {
+		struct nfp_insn_meta *meta;
 
-	spin_unlock_bh(&priv->rx_filter_lock);
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			return -ENOMEM;
 
-	latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
-	latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+		meta->insn = prog[i];
+		meta->n = i;
 
-	if (latest.pkts != priv->rx_filter.pkts)
-		priv->rx_filter_change = jiffies;
+		list_add_tail(&meta->l, &nfp_prog->insns);
+	}
 
-	priv->rx_filter = latest;
+	return 0;
 }
 
-static void nfp_net_bpf_stats_reset(struct nfp_net *nn)
+static void nfp_prog_free(struct nfp_prog *nfp_prog)
 {
-	struct nfp_net_bpf_priv *priv = nn->app_priv;
+	struct nfp_insn_meta *meta, *tmp;
 
-	priv->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
-	priv->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
-	priv->rx_filter_prev = priv->rx_filter;
-	priv->rx_filter_change = jiffies;
+	list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) {
+		list_del(&meta->l);
+		kfree(meta);
+	}
+	kfree(nfp_prog);
 }
 
-static int
-nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
+			  struct netdev_bpf *bpf)
 {
-	struct nfp_net_bpf_priv *priv = nn->app_priv;
-	u64 bytes, pkts;
+	struct bpf_prog *prog = bpf->verifier.prog;
+	struct nfp_prog *nfp_prog;
+	int ret;
 
-	pkts = priv->rx_filter.pkts - priv->rx_filter_prev.pkts;
-	bytes = priv->rx_filter.bytes - priv->rx_filter_prev.bytes;
-	bytes -= pkts * ETH_HLEN;
+	nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL);
+	if (!nfp_prog)
+		return -ENOMEM;
+	prog->aux->offload->dev_priv = nfp_prog;
 
-	priv->rx_filter_prev = priv->rx_filter;
+	INIT_LIST_HEAD(&nfp_prog->insns);
+	nfp_prog->type = prog->type;
 
-	tcf_exts_stats_update(cls_bpf->exts,
-			      bytes, pkts, priv->rx_filter_change);
+	ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len);
+	if (ret)
+		goto err_free;
 
-	return 0;
-}
+	nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog);
+	bpf->verifier.ops = &nfp_bpf_analyzer_ops;
 
-static int
-nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
-{
-	const struct tc_action *a;
-	LIST_HEAD(actions);
+	return 0;
 
-	if (!cls_bpf->exts)
-		return NN_ACT_XDP;
+err_free:
+	nfp_prog_free(nfp_prog);
 
-	/* TC direct action */
-	if (cls_bpf->exts_integrated) {
-		if (!tcf_exts_has_actions(cls_bpf->exts))
-			return NN_ACT_DIRECT;
+	return ret;
+}
 
+int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
+		      struct bpf_prog *prog)
+{
+	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
+	unsigned int stack_size;
+	unsigned int max_instr;
+
+	stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
+	if (prog->aux->stack_depth > stack_size) {
+		nn_info(nn, "stack too large: program %dB > FW stack %dB\n",
+			prog->aux->stack_depth, stack_size);
 		return -EOPNOTSUPP;
 	}
 
-	/* TC legacy mode */
-	if (!tcf_exts_has_one_action(cls_bpf->exts))
-		return -EOPNOTSUPP;
+	nfp_prog->stack_depth = prog->aux->stack_depth;
+	nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
+	nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
 
-	tcf_exts_to_list(cls_bpf->exts, &actions);
-	list_for_each_entry(a, &actions, list) {
-		if (is_tcf_gact_shot(a))
-			return NN_ACT_TC_DROP;
+	max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
+	nfp_prog->__prog_alloc_len = max_instr * sizeof(u64);
 
-		if (is_tcf_mirred_egress_redirect(a) &&
-		    tcf_mirred_ifindex(a) == nn->dp.netdev->ifindex)
-			return NN_ACT_TC_REDIR;
-	}
+	nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL);
+	if (!nfp_prog->prog)
+		return -ENOMEM;
 
-	return -EOPNOTSUPP;
+	return nfp_bpf_jit(nfp_prog);
 }
 
-static int
-nfp_net_bpf_offload_prepare(struct nfp_net *nn,
-			    struct tc_cls_bpf_offload *cls_bpf,
-			    struct nfp_bpf_result *res,
-			    void **code, dma_addr_t *dma_addr, u16 max_instr)
+int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
+		    struct bpf_prog *prog)
 {
-	unsigned int code_sz = max_instr * sizeof(u64);
-	enum nfp_bpf_action_type act;
-	u16 start_off, done_off;
-	unsigned int max_mtu;
-	int ret;
+	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
 
-	if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
-		return -EOPNOTSUPP;
+	kfree(nfp_prog->prog);
+	nfp_prog_free(nfp_prog);
 
-	ret = nfp_net_bpf_get_act(nn, cls_bpf);
-	if (ret < 0)
-		return ret;
-	act = ret;
+	return 0;
+}
+
+static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
+{
+	struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
+	unsigned int max_mtu;
+	dma_addr_t dma_addr;
+	int err;
 
 	max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
 	if (max_mtu < nn->dp.netdev->mtu) {
@@ -164,134 +166,80 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn,
 		return -EOPNOTSUPP;
 	}
 
-	start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
-	done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
-
-	*code = dma_zalloc_coherent(nn->dp.dev, code_sz, dma_addr, GFP_KERNEL);
-	if (!*code)
+	dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog,
+				  nfp_prog->prog_len * sizeof(u64),
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(nn->dp.dev, dma_addr))
 		return -ENOMEM;
 
-	ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off,
-			  max_instr, res);
-	if (ret)
-		goto out;
+	nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len);
+	nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr);
 
-	return 0;
+	/* Load up the JITed code */
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
+	if (err)
+		nn_err(nn, "FW command error while loading BPF: %d\n", err);
 
-out:
-	dma_free_coherent(nn->dp.dev, code_sz, *code, *dma_addr);
-	return ret;
+	dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64),
+			 DMA_TO_DEVICE);
+
+	return err;
 }
 
-static void
-nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
-			   void *code, dma_addr_t dma_addr,
-			   unsigned int code_sz, unsigned int n_instr,
-			   bool dense_mode)
+static void nfp_net_bpf_start(struct nfp_net *nn)
 {
-	struct nfp_net_bpf_priv *priv = nn->app_priv;
-	u64 bpf_addr = dma_addr;
 	int err;
 
-	nn->dp.bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
-
-	if (dense_mode)
-		bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
-
-	nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr);
-	nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr);
-
-	/* Load up the JITed code */
-	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
-	if (err)
-		nn_err(nn, "FW command error while loading BPF: %d\n", err);
-
 	/* Enable passing packets through BPF function */
 	nn->dp.ctrl |= NFP_NET_CFG_CTRL_BPF;
 	nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl);
 	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
 	if (err)
 		nn_err(nn, "FW command error while enabling BPF: %d\n", err);
-
-	dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr);
-
-	nfp_net_bpf_stats_reset(nn);
-	mod_timer(&priv->rx_filter_stats_timer,
-		  jiffies + NFP_NET_STAT_POLL_IVL);
 }
 
 static int nfp_net_bpf_stop(struct nfp_net *nn)
 {
-	struct nfp_net_bpf_priv *priv = nn->app_priv;
-
 	if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF))
 		return 0;
 
-	spin_lock_bh(&priv->rx_filter_lock);
 	nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF;
-	spin_unlock_bh(&priv->rx_filter_lock);
 	nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl);
 
-	del_timer_sync(&priv->rx_filter_stats_timer);
-	nn->dp.bpf_offload_skip_sw = 0;
-
 	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
 }
 
-int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
+			bool old_prog)
 {
-	struct nfp_bpf_result res;
-	dma_addr_t dma_addr;
-	u16 max_instr;
-	void *code;
 	int err;
 
-	max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
+	if (prog && !prog->aux->offload)
+		return -EINVAL;
 
-	switch (cls_bpf->command) {
-	case TC_CLSBPF_REPLACE:
-		/* There is nothing stopping us from implementing seamless
-		 * replace but the simple method of loading I adopted in
-		 * the firmware does not handle atomic replace (i.e. we have to
-		 * stop the BPF offload and re-enable it).  Leaking-in a few
-		 * frames which didn't have BPF applied in the hardware should
-		 * be fine if software fallback is available, though.
-		 */
-		if (nn->dp.bpf_offload_skip_sw)
-			return -EBUSY;
-
-		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
-						  &dma_addr, max_instr);
-		if (err)
-			return err;
-
-		nfp_net_bpf_stop(nn);
-		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
-					   dma_addr, max_instr * sizeof(u64),
-					   res.n_instr, res.dense_mode);
-		return 0;
+	if (prog && old_prog) {
+		u8 cap;
 
-	case TC_CLSBPF_ADD:
-		if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)
+		cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP);
+		if (!(cap & NFP_NET_BPF_CAP_RELO)) {
+			nn_err(nn, "FW does not support live reload\n");
 			return -EBUSY;
+		}
+	}
 
-		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
-						  &dma_addr, max_instr);
-		if (err)
-			return err;
-
-		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
-					   dma_addr, max_instr * sizeof(u64),
-					   res.n_instr, res.dense_mode);
-		return 0;
+	/* Something else is loaded, different program type? */
+	if (!old_prog && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)
+		return -EBUSY;
 
-	case TC_CLSBPF_DESTROY:
+	if (old_prog && !prog)
 		return nfp_net_bpf_stop(nn);
 
-	case TC_CLSBPF_STATS:
-		return nfp_net_bpf_stats_update(nn, cls_bpf);
+	err = nfp_net_bpf_load(nn, prog);
+	if (err)
+		return err;
+
+	if (!old_prog)
+		nfp_net_bpf_start(nn);
 
-	default:
-		return -EOPNOTSUPP;
-	}
+	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 5b783a91b115..8d43491ddd6b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -40,12 +40,6 @@
 
 #include "main.h"
 
-/* Analyzer/verifier definitions */
-struct nfp_bpf_analyzer_priv {
-	struct nfp_prog *prog;
-	struct nfp_insn_meta *meta;
-};
-
 static struct nfp_insn_meta *
 nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 		  unsigned int insn_idx, unsigned int n_insns)
@@ -76,12 +70,12 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 
 static int
 nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
-		   const struct bpf_verifier_env *env)
+		   struct bpf_verifier_env *env)
 {
-	const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+	const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0;
 	u64 imm;
 
-	if (nfp_prog->act == NN_ACT_XDP)
+	if (nfp_prog->type == BPF_PROG_TYPE_XDP)
 		return 0;
 
 	if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) {
@@ -94,13 +88,8 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
 	}
 
 	imm = reg0->var_off.value;
-	if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) {
-		pr_info("unsupported exit state: %d, imm: %llx\n",
-			reg0->type, imm);
-		return -EINVAL;
-	}
-
-	if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT &&
+	if (nfp_prog->type == BPF_PROG_TYPE_SCHED_CLS &&
+	    imm <= TC_ACT_REDIRECT &&
 	    imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN &&
 	    imm != TC_ACT_QUEUED) {
 		pr_info("unsupported exit state: %d, imm: %llx\n",
@@ -112,29 +101,76 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
 }
 
 static int
-nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
-		      const struct bpf_verifier_env *env, u8 reg)
+nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog,
+			   struct nfp_insn_meta *meta,
+			   const struct bpf_reg_state *reg)
 {
-	if (env->cur_state.regs[reg].type != PTR_TO_CTX)
+	s32 old_off, new_off;
+
+	if (!tnum_is_const(reg->var_off)) {
+		pr_info("variable ptr stack access\n");
 		return -EINVAL;
+	}
 
-	return 0;
+	if (meta->ptr.type == NOT_INIT)
+		return 0;
+
+	old_off = meta->ptr.off + meta->ptr.var_off.value;
+	new_off = reg->off + reg->var_off.value;
+
+	meta->ptr_not_const |= old_off != new_off;
+
+	if (!meta->ptr_not_const)
+		return 0;
+
+	if (old_off % 4 == new_off % 4)
+		return 0;
+
+	pr_info("stack access changed location was:%d is:%d\n",
+		old_off, new_off);
+	return -EINVAL;
 }
 
 static int
-nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  struct bpf_verifier_env *env, u8 reg_no)
 {
-	struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv;
-	struct nfp_insn_meta *meta = priv->meta;
+	const struct bpf_reg_state *reg = cur_regs(env) + reg_no;
+	int err;
 
-	meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len);
-	priv->meta = meta;
+	if (reg->type != PTR_TO_CTX &&
+	    reg->type != PTR_TO_STACK &&
+	    reg->type != PTR_TO_PACKET) {
+		pr_info("unsupported ptr type: %d\n", reg->type);
+		return -EINVAL;
+	}
 
-	if (meta->insn.src_reg == BPF_REG_10 ||
-	    meta->insn.dst_reg == BPF_REG_10) {
-		pr_err("stack not yet supported\n");
+	if (reg->type == PTR_TO_STACK) {
+		err = nfp_bpf_check_stack_access(nfp_prog, meta, reg);
+		if (err)
+			return err;
+	}
+
+	if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) {
+		pr_info("ptr type changed for instruction %d -> %d\n",
+			meta->ptr.type, reg->type);
 		return -EINVAL;
 	}
+
+	meta->ptr = *reg;
+
+	return 0;
+}
+
+static int
+nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+{
+	struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv;
+	struct nfp_insn_meta *meta = nfp_prog->verifier_meta;
+
+	meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len);
+	nfp_prog->verifier_meta = meta;
+
 	if (meta->insn.src_reg >= MAX_BPF_REG ||
 	    meta->insn.dst_reg >= MAX_BPF_REG) {
 		pr_err("program uses extended registers - jit hardening?\n");
@@ -142,37 +178,18 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	}
 
 	if (meta->insn.code == (BPF_JMP | BPF_EXIT))
-		return nfp_bpf_check_exit(priv->prog, env);
+		return nfp_bpf_check_exit(nfp_prog, env);
 
 	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
-		return nfp_bpf_check_ctx_ptr(priv->prog, env,
-					     meta->insn.src_reg);
+		return nfp_bpf_check_ptr(nfp_prog, meta, env,
+					 meta->insn.src_reg);
 	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
-		return nfp_bpf_check_ctx_ptr(priv->prog, env,
-					     meta->insn.dst_reg);
+		return nfp_bpf_check_ptr(nfp_prog, meta, env,
+					 meta->insn.dst_reg);
 
 	return 0;
 }
 
-static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
 	.insn_hook = nfp_verify_insn,
 };
-
-int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog)
-{
-	struct nfp_bpf_analyzer_priv *priv;
-	int ret;
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->prog = nfp_prog;
-	priv->meta = nfp_prog_first_meta(nfp_prog);
-
-	ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv);
-
-	kfree(priv);
-
-	return ret;
-}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 8ea9320014ee..c1c595f8bb87 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -36,7 +36,9 @@
 #include <net/switchdev.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
 
 #include "cmsg.h"
 #include "main.h"
@@ -45,13 +47,9 @@
 static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
 {
 	size_t act_size = sizeof(struct nfp_fl_pop_vlan);
-	u16 tmp_pop_vlan_op;
 
-	tmp_pop_vlan_op =
-		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
-		FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN);
-
-	pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op);
+	pop_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_POP_VLAN;
+	pop_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 	pop_vlan->reserved = 0;
 }
 
@@ -60,64 +58,373 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
 		 const struct tc_action *action)
 {
 	size_t act_size = sizeof(struct nfp_fl_push_vlan);
-	struct tcf_vlan *vlan = to_vlan(action);
 	u16 tmp_push_vlan_tci;
-	u16 tmp_push_vlan_op;
-
-	tmp_push_vlan_op =
-		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
-		FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN);
 
-	push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op);
-	/* Set action push vlan parameters. */
+	push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN;
+	push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 	push_vlan->reserved = 0;
 	push_vlan->vlan_tpid = tcf_vlan_push_proto(action);
 
 	tmp_push_vlan_tci =
-		FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) |
-		FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) |
+		FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) |
+		FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) |
 		NFP_FL_PUSH_VLAN_CFI;
 	push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
 }
 
+static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
+					 enum nfp_flower_tun_type tun_type)
+{
+	if (!out_dev->rtnl_link_ops)
+		return false;
+
+	if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan"))
+		return tun_type == NFP_FL_TUNNEL_VXLAN;
+
+	return false;
+}
+
 static int
 nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
 	      struct nfp_fl_payload *nfp_flow, bool last,
-	      struct net_device *in_dev)
+	      struct net_device *in_dev, enum nfp_flower_tun_type tun_type,
+	      int *tun_out_cnt)
 {
 	size_t act_size = sizeof(struct nfp_fl_output);
 	struct net_device *out_dev;
-	u16 tmp_output_op;
+	u16 tmp_flags;
 	int ifindex;
 
-	/* Set action opcode to output action. */
-	tmp_output_op =
-		FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
-		FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT);
-
-	output->a_op = cpu_to_be16(tmp_output_op);
-
-	/* Set action output parameters. */
-	output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0);
+	output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT;
+	output->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 
 	ifindex = tcf_mirred_ifindex(action);
 	out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
 	if (!out_dev)
 		return -EOPNOTSUPP;
 
-	/* Only offload egress ports are on the same device as the ingress
-	 * port.
+	tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0;
+
+	if (tun_type) {
+		/* Verify the egress netdev matches the tunnel type. */
+		if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type))
+			return -EOPNOTSUPP;
+
+		if (*tun_out_cnt)
+			return -EOPNOTSUPP;
+		(*tun_out_cnt)++;
+
+		output->flags = cpu_to_be16(tmp_flags |
+					    NFP_FL_OUT_FLAGS_USE_TUN);
+		output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
+	} else {
+		/* Set action output parameters. */
+		output->flags = cpu_to_be16(tmp_flags);
+
+		/* Only offload if egress ports are on the same device as the
+		 * ingress port.
+		 */
+		if (!switchdev_port_same_parent_id(in_dev, out_dev))
+			return -EOPNOTSUPP;
+		if (!nfp_netdev_is_nfp_repr(out_dev))
+			return -EOPNOTSUPP;
+
+		output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
+		if (!output->port)
+			return -EOPNOTSUPP;
+	}
+	nfp_flow->meta.shortcut = output->port;
+
+	return 0;
+}
+
+static bool nfp_fl_supported_tun_port(const struct tc_action *action)
+{
+	struct ip_tunnel_info *tun = tcf_tunnel_info(action);
+
+	return tun->key.tp_dst == htons(NFP_FL_VXLAN_PORT);
+}
+
+static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
+{
+	size_t act_size = sizeof(struct nfp_fl_pre_tunnel);
+	struct nfp_fl_pre_tunnel *pre_tun_act;
+
+	/* Pre_tunnel action must be first on action list.
+	 * If other actions already exist they need pushed forward.
 	 */
-	if (!switchdev_port_same_parent_id(in_dev, out_dev))
+	if (act_len)
+		memmove(act_data + act_size, act_data, act_len);
+
+	pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data;
+
+	memset(pre_tun_act, 0, act_size);
+
+	pre_tun_act->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_TUNNEL;
+	pre_tun_act->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+	return pre_tun_act;
+}
+
+static int
+nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan,
+		 const struct tc_action *action,
+		 struct nfp_fl_pre_tunnel *pre_tun)
+{
+	struct ip_tunnel_info *vxlan = tcf_tunnel_info(action);
+	size_t act_size = sizeof(struct nfp_fl_set_vxlan);
+	u32 tmp_set_vxlan_type_index = 0;
+	/* Currently support one pre-tunnel so index is always 0. */
+	int pretun_idx = 0;
+
+	if (vxlan->options_len) {
+		/* Do not support options e.g. vxlan gpe. */
 		return -EOPNOTSUPP;
-	if (!nfp_netdev_is_nfp_repr(out_dev))
+	}
+
+	set_vxlan->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
+	set_vxlan->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+	/* Set tunnel type and pre-tunnel index. */
+	tmp_set_vxlan_type_index |=
+		FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, NFP_FL_TUNNEL_VXLAN) |
+		FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx);
+
+	set_vxlan->tun_type_index = cpu_to_be32(tmp_set_vxlan_type_index);
+
+	set_vxlan->tun_id = vxlan->key.tun_id;
+	set_vxlan->tun_flags = vxlan->key.tun_flags;
+	set_vxlan->ipv4_ttl = vxlan->key.ttl;
+	set_vxlan->ipv4_tos = vxlan->key.tos;
+
+	/* Complete pre_tunnel action. */
+	pre_tun->ipv4_dst = vxlan->key.u.ipv4.dst;
+
+	return 0;
+}
+
+static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask)
+{
+	u32 oldvalue = get_unaligned((u32 *)p_exact);
+	u32 oldmask = get_unaligned((u32 *)p_mask);
+
+	value &= mask;
+	value |= oldvalue & ~mask;
+
+	put_unaligned(oldmask | mask, (u32 *)p_mask);
+	put_unaligned(value, (u32 *)p_exact);
+}
+
+static int
+nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off,
+	       struct nfp_fl_set_eth *set_eth)
+{
+	u32 exact, mask;
+
+	if (off + 4 > ETH_ALEN * 2)
 		return -EOPNOTSUPP;
 
-	output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
-	if (!output->port)
+	mask = ~tcf_pedit_mask(action, idx);
+	exact = tcf_pedit_val(action, idx);
+
+	if (exact & ~mask)
 		return -EOPNOTSUPP;
 
-	nfp_flow->meta.shortcut = output->port;
+	nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off],
+			    &set_eth->eth_addr_mask[off]);
+
+	set_eth->reserved = cpu_to_be16(0);
+	set_eth->head.jump_id = NFP_FL_ACTION_OPCODE_SET_ETHERNET;
+	set_eth->head.len_lw = sizeof(*set_eth) >> NFP_FL_LW_SIZ;
+
+	return 0;
+}
+
+static int
+nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
+	       struct nfp_fl_set_ip4_addrs *set_ip_addr)
+{
+	__be32 exact, mask;
+
+	/* We are expecting tcf_pedit to return a big endian value */
+	mask = (__force __be32)~tcf_pedit_mask(action, idx);
+	exact = (__force __be32)tcf_pedit_val(action, idx);
+
+	if (exact & ~mask)
+		return -EOPNOTSUPP;
+
+	switch (off) {
+	case offsetof(struct iphdr, daddr):
+		set_ip_addr->ipv4_dst_mask = mask;
+		set_ip_addr->ipv4_dst = exact;
+		break;
+	case offsetof(struct iphdr, saddr):
+		set_ip_addr->ipv4_src_mask = mask;
+		set_ip_addr->ipv4_src = exact;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	set_ip_addr->reserved = cpu_to_be16(0);
+	set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS;
+	set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> NFP_FL_LW_SIZ;
+
+	return 0;
+}
+
+static void
+nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask,
+		      struct nfp_fl_set_ipv6_addr *ip6)
+{
+	ip6->ipv6[idx % 4].mask = mask;
+	ip6->ipv6[idx % 4].exact = exact;
+
+	ip6->reserved = cpu_to_be16(0);
+	ip6->head.jump_id = opcode_tag;
+	ip6->head.len_lw = sizeof(*ip6) >> NFP_FL_LW_SIZ;
+}
+
+static int
+nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
+	       struct nfp_fl_set_ipv6_addr *ip_dst,
+	       struct nfp_fl_set_ipv6_addr *ip_src)
+{
+	__be32 exact, mask;
+
+	/* We are expecting tcf_pedit to return a big endian value */
+	mask = (__force __be32)~tcf_pedit_mask(action, idx);
+	exact = (__force __be32)tcf_pedit_val(action, idx);
+
+	if (exact & ~mask)
+		return -EOPNOTSUPP;
+
+	if (off < offsetof(struct ipv6hdr, saddr))
+		return -EOPNOTSUPP;
+	else if (off < offsetof(struct ipv6hdr, daddr))
+		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx,
+				      exact, mask, ip_src);
+	else if (off < offsetof(struct ipv6hdr, daddr) +
+		       sizeof(struct in6_addr))
+		nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx,
+				      exact, mask, ip_dst);
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int
+nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off,
+		 struct nfp_fl_set_tport *set_tport, int opcode)
+{
+	u32 exact, mask;
+
+	if (off)
+		return -EOPNOTSUPP;
+
+	mask = ~tcf_pedit_mask(action, idx);
+	exact = tcf_pedit_val(action, idx);
+
+	if (exact & ~mask)
+		return -EOPNOTSUPP;
+
+	nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val,
+			    set_tport->tp_port_mask);
+
+	set_tport->reserved = cpu_to_be16(0);
+	set_tport->head.jump_id = opcode;
+	set_tport->head.len_lw = sizeof(*set_tport) >> NFP_FL_LW_SIZ;
+
+	return 0;
+}
+
+static int
+nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
+{
+	struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src;
+	struct nfp_fl_set_ip4_addrs set_ip_addr;
+	struct nfp_fl_set_tport set_tport;
+	struct nfp_fl_set_eth set_eth;
+	enum pedit_header_type htype;
+	int idx, nkeys, err;
+	size_t act_size;
+	u32 offset, cmd;
+
+	memset(&set_ip6_dst, 0, sizeof(set_ip6_dst));
+	memset(&set_ip6_src, 0, sizeof(set_ip6_src));
+	memset(&set_ip_addr, 0, sizeof(set_ip_addr));
+	memset(&set_tport, 0, sizeof(set_tport));
+	memset(&set_eth, 0, sizeof(set_eth));
+	nkeys = tcf_pedit_nkeys(action);
+
+	for (idx = 0; idx < nkeys; idx++) {
+		cmd = tcf_pedit_cmd(action, idx);
+		htype = tcf_pedit_htype(action, idx);
+		offset = tcf_pedit_offset(action, idx);
+
+		if (cmd != TCA_PEDIT_KEY_EX_CMD_SET)
+			return -EOPNOTSUPP;
+
+		switch (htype) {
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+			err = nfp_fl_set_eth(action, idx, offset, &set_eth);
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+			err = nfp_fl_set_ip4(action, idx, offset, &set_ip_addr);
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+			err = nfp_fl_set_ip6(action, idx, offset, &set_ip6_dst,
+					     &set_ip6_src);
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+			err = nfp_fl_set_tport(action, idx, offset, &set_tport,
+					       NFP_FL_ACTION_OPCODE_SET_TCP);
+			break;
+		case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+			err = nfp_fl_set_tport(action, idx, offset, &set_tport,
+					       NFP_FL_ACTION_OPCODE_SET_UDP);
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+		if (err)
+			return err;
+	}
+
+	if (set_eth.head.len_lw) {
+		act_size = sizeof(set_eth);
+		memcpy(nfp_action, &set_eth, act_size);
+		*a_len += act_size;
+	} else if (set_ip_addr.head.len_lw) {
+		act_size = sizeof(set_ip_addr);
+		memcpy(nfp_action, &set_ip_addr, act_size);
+		*a_len += act_size;
+	} else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
+		/* TC compiles set src and dst IPv6 address as a single action,
+		 * the hardware requires this to be 2 separate actions.
+		 */
+		act_size = sizeof(set_ip6_src);
+		memcpy(nfp_action, &set_ip6_src, act_size);
+		*a_len += act_size;
+
+		act_size = sizeof(set_ip6_dst);
+		memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst,
+		       act_size);
+		*a_len += act_size;
+	} else if (set_ip6_dst.head.len_lw) {
+		act_size = sizeof(set_ip6_dst);
+		memcpy(nfp_action, &set_ip6_dst, act_size);
+		*a_len += act_size;
+	} else if (set_ip6_src.head.len_lw) {
+		act_size = sizeof(set_ip6_src);
+		memcpy(nfp_action, &set_ip6_src, act_size);
+		*a_len += act_size;
+	} else if (set_tport.head.len_lw) {
+		act_size = sizeof(set_tport);
+		memcpy(nfp_action, &set_tport, act_size);
+		*a_len += act_size;
+	}
 
 	return 0;
 }
@@ -125,8 +432,11 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
 static int
 nfp_flower_loop_action(const struct tc_action *a,
 		       struct nfp_fl_payload *nfp_fl, int *a_len,
-		       struct net_device *netdev)
+		       struct net_device *netdev,
+		       enum nfp_flower_tun_type *tun_type, int *tun_out_cnt)
 {
+	struct nfp_fl_pre_tunnel *pre_tun;
+	struct nfp_fl_set_vxlan *s_vxl;
 	struct nfp_fl_push_vlan *psh_v;
 	struct nfp_fl_pop_vlan *pop_v;
 	struct nfp_fl_output *output;
@@ -139,7 +449,8 @@ nfp_flower_loop_action(const struct tc_action *a,
 			return -EOPNOTSUPP;
 
 		output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_output(output, a, nfp_fl, true, netdev);
+		err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type,
+				    tun_out_cnt);
 		if (err)
 			return err;
 
@@ -149,7 +460,8 @@ nfp_flower_loop_action(const struct tc_action *a,
 			return -EOPNOTSUPP;
 
 		output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_output(output, a, nfp_fl, false, netdev);
+		err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type,
+				    tun_out_cnt);
 		if (err)
 			return err;
 
@@ -172,6 +484,32 @@ nfp_flower_loop_action(const struct tc_action *a,
 
 		nfp_fl_push_vlan(psh_v, a);
 		*a_len += sizeof(struct nfp_fl_push_vlan);
+	} else if (is_tcf_tunnel_set(a) && nfp_fl_supported_tun_port(a)) {
+		/* Pre-tunnel action is required for tunnel encap.
+		 * This checks for next hop entries on NFP.
+		 * If none, the packet falls back before applying other actions.
+		 */
+		if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
+		    sizeof(struct nfp_fl_set_vxlan) > NFP_FL_MAX_A_SIZ)
+			return -EOPNOTSUPP;
+
+		*tun_type = NFP_FL_TUNNEL_VXLAN;
+		pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len);
+		nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+		*a_len += sizeof(struct nfp_fl_pre_tunnel);
+
+		s_vxl = (struct nfp_fl_set_vxlan *)&nfp_fl->action_data[*a_len];
+		err = nfp_fl_set_vxlan(s_vxl, a, pre_tun);
+		if (err)
+			return err;
+
+		*a_len += sizeof(struct nfp_fl_set_vxlan);
+	} else if (is_tcf_tunnel_release(a)) {
+		/* Tunnel decap is handled by default so accept action. */
+		return 0;
+	} else if (is_tcf_pedit(a)) {
+		if (nfp_fl_pedit(a, &nfp_fl->action_data[*a_len], a_len))
+			return -EOPNOTSUPP;
 	} else {
 		/* Currently we do not handle any other actions. */
 		return -EOPNOTSUPP;
@@ -184,18 +522,22 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
 			      struct net_device *netdev,
 			      struct nfp_fl_payload *nfp_flow)
 {
-	int act_len, act_cnt, err;
+	int act_len, act_cnt, err, tun_out_cnt;
+	enum nfp_flower_tun_type tun_type;
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 
 	memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
 	nfp_flow->meta.act_len = 0;
+	tun_type = NFP_FL_TUNNEL_NONE;
 	act_len = 0;
 	act_cnt = 0;
+	tun_out_cnt = 0;
 
 	tcf_exts_to_list(flow->exts, &actions);
 	list_for_each_entry(a, &actions, list) {
-		err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev);
+		err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev,
+					     &tun_type, &tun_out_cnt);
 		if (err)
 			return err;
 		act_cnt++;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index c3ca05d10fe1..e98bb9cdb6a3 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -38,17 +38,10 @@
 #include <net/dst_metadata.h>
 
 #include "main.h"
-#include "../nfpcore/nfp_cpp.h"
 #include "../nfp_net.h"
 #include "../nfp_net_repr.h"
 #include "./cmsg.h"
 
-#define nfp_flower_cmsg_warn(app, fmt, args...)				\
-	do {								\
-		if (net_ratelimit())					\
-			nfp_warn((app)->cpp, fmt, ## args);		\
-	} while (0)
-
 static struct nfp_flower_cmsg_hdr *
 nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
 {
@@ -57,14 +50,14 @@ nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
 
 struct sk_buff *
 nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
-		      enum nfp_flower_cmsg_type_port type)
+		      enum nfp_flower_cmsg_type_port type, gfp_t flag)
 {
 	struct nfp_flower_cmsg_hdr *ch;
 	struct sk_buff *skb;
 
 	size += NFP_FLOWER_CMSG_HLEN;
 
-	skb = nfp_app_ctrl_msg_alloc(app, size, GFP_KERNEL);
+	skb = nfp_app_ctrl_msg_alloc(app, size, flag);
 	if (!skb)
 		return NULL;
 
@@ -85,7 +78,8 @@ nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports)
 	unsigned int size;
 
 	size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]);
-	skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR);
+	skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR,
+				    GFP_KERNEL);
 	if (!skb)
 		return NULL;
 
@@ -116,7 +110,7 @@ int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok)
 	struct sk_buff *skb;
 
 	skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg),
-				    NFP_FLOWER_CMSG_TYPE_PORT_MOD);
+				    NFP_FLOWER_CMSG_TYPE_PORT_MOD, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
@@ -188,6 +182,15 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
 	case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
 		nfp_flower_rx_flow_stats(app, skb);
 		break;
+	case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
+		nfp_tunnel_request_route(app, skb);
+		break;
+	case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
+		nfp_tunnel_keep_alive(app, skb);
+		break;
+	case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH:
+		/* Acks from the NFP that the route is added - ignore. */
+		break;
 	default:
 		nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
 				     type);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index a2ec60344236..66070741d55f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -39,6 +39,7 @@
 #include <linux/types.h>
 
 #include "../nfp_app.h"
+#include "../nfpcore/nfp_cpp.h"
 
 #define NFP_FLOWER_LAYER_META		BIT(0)
 #define NFP_FLOWER_LAYER_PORT		BIT(1)
@@ -56,6 +57,11 @@
 #define NFP_FLOWER_MASK_VLAN_CFI	BIT(12)
 #define NFP_FLOWER_MASK_VLAN_VID	GENMASK(11, 0)
 
+#define NFP_FLOWER_MASK_MPLS_LB		GENMASK(31, 12)
+#define NFP_FLOWER_MASK_MPLS_TC		GENMASK(11, 9)
+#define NFP_FLOWER_MASK_MPLS_BOS	BIT(8)
+#define NFP_FLOWER_MASK_MPLS_Q		BIT(0)
+
 #define NFP_FL_SC_ACT_DROP		0x80000000
 #define NFP_FL_SC_ACT_USER		0x7D000000
 #define NFP_FL_SC_ACT_POPV		0x6A000000
@@ -67,13 +73,18 @@
 #define NFP_FL_LW_SIZ			2
 
 /* Action opcodes */
-#define NFP_FL_ACTION_OPCODE_OUTPUT	0
-#define NFP_FL_ACTION_OPCODE_PUSH_VLAN	1
-#define NFP_FL_ACTION_OPCODE_POP_VLAN	2
-#define NFP_FL_ACTION_OPCODE_NUM	32
-
-#define NFP_FL_ACT_JMP_ID		GENMASK(15, 8)
-#define NFP_FL_ACT_LEN_LW		GENMASK(7, 0)
+#define NFP_FL_ACTION_OPCODE_OUTPUT		0
+#define NFP_FL_ACTION_OPCODE_PUSH_VLAN		1
+#define NFP_FL_ACTION_OPCODE_POP_VLAN		2
+#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL	6
+#define NFP_FL_ACTION_OPCODE_SET_ETHERNET	7
+#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS	9
+#define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC	11
+#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST	12
+#define NFP_FL_ACTION_OPCODE_SET_UDP		14
+#define NFP_FL_ACTION_OPCODE_SET_TCP		15
+#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL		17
+#define NFP_FL_ACTION_OPCODE_NUM		32
 
 #define NFP_FL_OUT_FLAGS_LAST		BIT(15)
 #define NFP_FL_OUT_FLAGS_USE_TUN	BIT(4)
@@ -83,21 +94,74 @@
 #define NFP_FL_PUSH_VLAN_CFI		BIT(12)
 #define NFP_FL_PUSH_VLAN_VID		GENMASK(11, 0)
 
+/* Tunnel ports */
+#define NFP_FL_PORT_TYPE_TUN		0x50000000
+#define NFP_FL_IPV4_TUNNEL_TYPE		GENMASK(7, 4)
+#define NFP_FL_IPV4_PRE_TUN_INDEX	GENMASK(2, 0)
+
+#define nfp_flower_cmsg_warn(app, fmt, args...)                         \
+	do {                                                            \
+		if (net_ratelimit())                                    \
+			nfp_warn((app)->cpp, fmt, ## args);             \
+	} while (0)
+
+enum nfp_flower_tun_type {
+	NFP_FL_TUNNEL_NONE =	0,
+	NFP_FL_TUNNEL_VXLAN =	2,
+};
+
+struct nfp_fl_act_head {
+	u8 jump_id;
+	u8 len_lw;
+};
+
+struct nfp_fl_set_eth {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	u8 eth_addr_mask[ETH_ALEN * 2];
+	u8 eth_addr_val[ETH_ALEN * 2];
+};
+
+struct nfp_fl_set_ip4_addrs {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	__be32 ipv4_src_mask;
+	__be32 ipv4_src;
+	__be32 ipv4_dst_mask;
+	__be32 ipv4_dst;
+};
+
+struct nfp_fl_set_ipv6_addr {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	struct {
+		__be32 mask;
+		__be32 exact;
+	} ipv6[4];
+};
+
+struct nfp_fl_set_tport {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	u8 tp_port_mask[4];
+	u8 tp_port_val[4];
+};
+
 struct nfp_fl_output {
-	__be16 a_op;
+	struct nfp_fl_act_head head;
 	__be16 flags;
 	__be32 port;
 };
 
 struct nfp_fl_push_vlan {
-	__be16 a_op;
+	struct nfp_fl_act_head head;
 	__be16 reserved;
 	__be16 vlan_tpid;
 	__be16 vlan_tci;
 };
 
 struct nfp_fl_pop_vlan {
-	__be16 a_op;
+	struct nfp_fl_act_head head;
 	__be16 reserved;
 };
 
@@ -115,6 +179,25 @@ struct nfp_flower_meta_one {
 	u16 reserved;
 };
 
+struct nfp_fl_pre_tunnel {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	__be32 ipv4_dst;
+	/* reserved for use with IPv6 addresses */
+	__be32 extra[3];
+};
+
+struct nfp_fl_set_vxlan {
+	struct nfp_fl_act_head head;
+	__be16 reserved;
+	__be64 tun_id;
+	__be32 tun_type_index;
+	__be16 tun_flags;
+	u8 ipv4_ttl;
+	u8 ipv4_tos;
+	__be32 extra[2];
+} __packed;
+
 /* Metadata with L2 (1W/4B)
  * ----------------------------------------------------------------
  *    3                   2                   1
@@ -230,6 +313,36 @@ struct nfp_flower_ipv6 {
 	struct in6_addr ipv6_dst;
 };
 
+/* Flow Frame VXLAN --> Tunnel details (4W/16B)
+ * -----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_src                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         ipv4_addr_dst                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           tun_flags           |       tos     |       ttl     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   gpe_flags   |            Reserved           | Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     VNI                       |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_vxlan {
+	__be32 ip_src;
+	__be32 ip_dst;
+	__be16 tun_flags;
+	u8 tos;
+	u8 ttl;
+	u8 gpe_flags;
+	u8 reserved[2];
+	u8 nxt_proto;
+	__be32 tun_id;
+};
+
+#define NFP_FL_TUN_VNI_OFFSET 8
+
 /* The base header for a control message packet.
  * Defines an 8-bit version, and an 8-bit type, padded
  * to a 32-bit word. Rest of the packet is type-specific.
@@ -249,6 +362,11 @@ enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_FLOW_DEL =		2,
 	NFP_FLOWER_CMSG_TYPE_MAC_REPR =		7,
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
+	NFP_FLOWER_CMSG_TYPE_NO_NEIGH =		10,
+	NFP_FLOWER_CMSG_TYPE_TUN_MAC =		11,
+	NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS =	12,
+	NFP_FLOWER_CMSG_TYPE_TUN_NEIGH =	13,
+	NFP_FLOWER_CMSG_TYPE_TUN_IPS =		14,
 	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
 	NFP_FLOWER_CMSG_TYPE_PORT_ECHO =	16,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
@@ -282,6 +400,7 @@ enum nfp_flower_cmsg_port_type {
 	NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC =	0x0,
 	NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT =	0x1,
 	NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT =	0x2,
+	NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT =  0x3,
 };
 
 enum nfp_flower_cmsg_port_vnic_type {
@@ -323,6 +442,11 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
 	return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
 }
 
+static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb)
+{
+	return skb->len - NFP_FLOWER_CMSG_HLEN;
+}
+
 struct sk_buff *
 nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports);
 void
@@ -334,6 +458,6 @@ void nfp_flower_cmsg_process_rx(struct work_struct *work);
 void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb);
 struct sk_buff *
 nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
-		      enum nfp_flower_cmsg_type_port type);
+		      enum nfp_flower_cmsg_type_port type, gfp_t flag);
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 91fe03617106..e0283bb24f06 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -142,8 +142,8 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
 {
 	u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp);
 	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_reprs *reprs, *old_reprs;
 	enum nfp_port_type port_type;
+	struct nfp_reprs *reprs;
 	const u8 queue = 0;
 	int i, err;
 
@@ -194,11 +194,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
 			 reprs->reprs[i]->name);
 	}
 
-	old_reprs = nfp_app_reprs_set(app, repr_type, reprs);
-	if (IS_ERR(old_reprs)) {
-		err = PTR_ERR(old_reprs);
-		goto err_reprs_clean;
-	}
+	nfp_app_reprs_set(app, repr_type, reprs);
 
 	return 0;
 err_reprs_clean:
@@ -222,8 +218,8 @@ static int
 nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
 {
 	struct nfp_eth_table *eth_tbl = app->pf->eth_tbl;
-	struct nfp_reprs *reprs, *old_reprs;
 	struct sk_buff *ctrl_skb;
+	struct nfp_reprs *reprs;
 	unsigned int i;
 	int err;
 
@@ -280,11 +276,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
 			 phys_port, reprs->reprs[phys_port]->name);
 	}
 
-	old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs);
-	if (IS_ERR(old_reprs)) {
-		err = PTR_ERR(old_reprs);
-		goto err_reprs_clean;
-	}
+	nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs);
 
 	/* The MAC_REPR control message should be sent after the MAC
 	 * representors are registered using nfp_app_reprs_set().  This is
@@ -436,6 +428,16 @@ static void nfp_flower_clean(struct nfp_app *app)
 	app->priv = NULL;
 }
 
+static int nfp_flower_start(struct nfp_app *app)
+{
+	return nfp_tunnel_config_start(app);
+}
+
+static void nfp_flower_stop(struct nfp_app *app)
+{
+	nfp_tunnel_config_stop(app);
+}
+
 const struct nfp_app_type app_flower = {
 	.id		= NFP_APP_FLOWER_NIC,
 	.name		= "flower",
@@ -453,6 +455,9 @@ const struct nfp_app_type app_flower = {
 	.repr_open	= nfp_flower_repr_netdev_open,
 	.repr_stop	= nfp_flower_repr_netdev_stop,
 
+	.start		= nfp_flower_start,
+	.stop		= nfp_flower_stop,
+
 	.ctrl_msg_rx	= nfp_flower_cmsg_rx,
 
 	.sriov_enable	= nfp_flower_sriov_enable,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index c20dd00a1cae..c90e72b7ff5a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -58,6 +58,8 @@ struct nfp_app;
 #define NFP_FL_MASK_REUSE_TIME_NS	40000
 #define NFP_FL_MASK_ID_LOCATION		1
 
+#define NFP_FL_VXLAN_PORT		4789
+
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
 	struct timespec64 *last_used;
@@ -82,6 +84,18 @@ struct nfp_fl_stats_id {
  * @flow_table:		Hash table used to store flower rules
  * @cmsg_work:		Workqueue for control messages processing
  * @cmsg_skbs:		List of skbs for control message processing
+ * @nfp_mac_off_list:	List of MAC addresses to offload
+ * @nfp_mac_index_list:	List of unique 8-bit indexes for non NFP netdevs
+ * @nfp_ipv4_off_list:	List of IPv4 addresses to offload
+ * @nfp_neigh_off_list:	List of neighbour offloads
+ * @nfp_mac_off_lock:	Lock for the MAC address list
+ * @nfp_mac_index_lock:	Lock for the MAC index list
+ * @nfp_ipv4_off_lock:	Lock for the IPv4 address list
+ * @nfp_neigh_off_lock:	Lock for the neighbour address list
+ * @nfp_mac_off_ids:	IDA to manage id assignment for offloaded macs
+ * @nfp_mac_off_count:	Number of MACs in address list
+ * @nfp_tun_mac_nb:	Notifier to monitor link state
+ * @nfp_tun_neigh_nb:	Notifier to monitor neighbour state
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -94,6 +108,18 @@ struct nfp_flower_priv {
 	DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
 	struct work_struct cmsg_work;
 	struct sk_buff_head cmsg_skbs;
+	struct list_head nfp_mac_off_list;
+	struct list_head nfp_mac_index_list;
+	struct list_head nfp_ipv4_off_list;
+	struct list_head nfp_neigh_off_list;
+	struct mutex nfp_mac_off_lock;
+	struct mutex nfp_mac_index_lock;
+	struct mutex nfp_ipv4_off_lock;
+	spinlock_t nfp_neigh_off_lock;
+	struct ida nfp_mac_off_ids;
+	int nfp_mac_off_count;
+	struct notifier_block nfp_tun_mac_nb;
+	struct notifier_block nfp_tun_neigh_nb;
 };
 
 struct nfp_fl_key_ls {
@@ -126,6 +152,7 @@ struct nfp_fl_payload {
 	struct rcu_head rcu;
 	spinlock_t lock; /* lock stats */
 	struct nfp_fl_stats stats;
+	__be32 nfp_tun_ipv4_addr;
 	char *unmasked_data;
 	char *mask_data;
 	char *action_data;
@@ -163,4 +190,12 @@ nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
 
 void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
 
+int nfp_tunnel_config_start(struct nfp_app *app);
+void nfp_tunnel_config_stop(struct nfp_app *app);
+void nfp_tunnel_write_macs(struct nfp_app *app);
+void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
+void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
+void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
+void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
+
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index d25b5038c3a2..60614d4f0e22 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -77,14 +77,17 @@ nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
 
 static int
 nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
-			bool mask_version)
+			bool mask_version, enum nfp_flower_tun_type tun_type)
 {
 	if (mask_version) {
 		frame->in_port = cpu_to_be32(~0);
 		return 0;
 	}
 
-	frame->in_port = cpu_to_be32(cmsg_port);
+	if (tun_type)
+		frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
+	else
+		frame->in_port = cpu_to_be32(cmsg_port);
 
 	return 0;
 }
@@ -108,8 +111,21 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
 		ether_addr_copy(frame->mac_src, &addr->src[0]);
 	}
 
-	if (mask_version)
-		frame->mpls_lse = cpu_to_be32(~0);
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+		struct flow_dissector_key_mpls *mpls;
+		u32 t_mpls;
+
+		mpls = skb_flow_dissector_target(flow->dissector,
+						 FLOW_DISSECTOR_KEY_MPLS,
+						 target);
+
+		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, mpls->mpls_label) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, mpls->mpls_tc) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, mpls->mpls_bos) |
+			 NFP_FLOWER_MASK_MPLS_Q;
+
+		frame->mpls_lse = cpu_to_be32(t_mpls);
+	}
 }
 
 static void
@@ -140,7 +156,6 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
 	struct flow_dissector_key_ipv4_addrs *addr;
 	struct flow_dissector_key_basic *basic;
 
-	/* Wildcard TOS/TTL for now. */
 	memset(frame, 0, sizeof(struct nfp_flower_ipv4));
 
 	if (dissector_uses_key(flow->dissector,
@@ -158,6 +173,16 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
 						  target);
 		frame->proto = basic->ip_proto;
 	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_dissector_key_ip *flow_ip;
+
+		flow_ip = skb_flow_dissector_target(flow->dissector,
+						    FLOW_DISSECTOR_KEY_IP,
+						    target);
+		frame->tos = flow_ip->tos;
+		frame->ttl = flow_ip->ttl;
+	}
 }
 
 static void
@@ -169,7 +194,6 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 	struct flow_dissector_key_ipv6_addrs *addr;
 	struct flow_dissector_key_basic *basic;
 
-	/* Wildcard LABEL/TOS/TTL for now. */
 	memset(frame, 0, sizeof(struct nfp_flower_ipv6));
 
 	if (dissector_uses_key(flow->dissector,
@@ -187,6 +211,51 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 						  target);
 		frame->proto = basic->ip_proto;
 	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_dissector_key_ip *flow_ip;
+
+		flow_ip = skb_flow_dissector_target(flow->dissector,
+						    FLOW_DISSECTOR_KEY_IP,
+						    target);
+		frame->tos = flow_ip->tos;
+		frame->ttl = flow_ip->ttl;
+	}
+}
+
+static void
+nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame,
+			 struct tc_cls_flower_offload *flow,
+			 bool mask_version, __be32 *tun_dst)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_ipv4_addrs *vxlan_ips;
+	struct flow_dissector_key_keyid *vni;
+
+	/* Wildcard TOS/TTL/GPE_FLAGS/NXT_PROTO for now. */
+	memset(frame, 0, sizeof(struct nfp_flower_vxlan));
+
+	if (dissector_uses_key(flow->dissector,
+			       FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		u32 temp_vni;
+
+		vni = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_ENC_KEYID,
+						target);
+		temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		frame->tun_id = cpu_to_be32(temp_vni);
+	}
+
+	if (dissector_uses_key(flow->dissector,
+			       FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		vxlan_ips =
+		   skb_flow_dissector_target(flow->dissector,
+					     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+					     target);
+		frame->ip_src = vxlan_ips->src;
+		frame->ip_dst = vxlan_ips->dst;
+		*tun_dst = vxlan_ips->dst;
+	}
 }
 
 int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
@@ -194,10 +263,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 				  struct net_device *netdev,
 				  struct nfp_fl_payload *nfp_flow)
 {
+	enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
+	__be32 tun_dst, tun_dst_mask = 0;
+	struct nfp_repr *netdev_repr;
 	int err;
 	u8 *ext;
 	u8 *msk;
 
+	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN)
+		tun_type = NFP_FL_TUNNEL_VXLAN;
+
 	memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
 	memset(nfp_flow->mask_data, 0, key_ls->key_size);
 
@@ -216,14 +291,14 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 		/* Populate Exact Port data. */
 		err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
 					      nfp_repr_get_port_id(netdev),
-					      false);
+					      false, tun_type);
 		if (err)
 			return err;
 
 		/* Populate Mask Port Data. */
 		err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
 					      nfp_repr_get_port_id(netdev),
-					      true);
+					      true, tun_type);
 		if (err)
 			return err;
 
@@ -291,5 +366,28 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 		msk += sizeof(struct nfp_flower_ipv6);
 	}
 
+	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) {
+		/* Populate Exact VXLAN Data. */
+		nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext,
+					 flow, false, &tun_dst);
+		/* Populate Mask VXLAN Data. */
+		nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk,
+					 flow, true, &tun_dst_mask);
+		ext += sizeof(struct nfp_flower_vxlan);
+		msk += sizeof(struct nfp_flower_vxlan);
+
+		/* Configure tunnel end point MAC. */
+		if (nfp_netdev_is_nfp_repr(netdev)) {
+			netdev_repr = netdev_priv(netdev);
+			nfp_tunnel_write_macs(netdev_repr->app);
+
+			/* Store the tunnel destination in the rule data.
+			 * This must be present and be an exact match.
+			 */
+			nfp_flow->nfp_tun_ipv4_addr = tun_dst;
+			nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
+		}
+	}
+
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 3226ddc55f99..193520ef23f0 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -140,7 +140,7 @@ exit_rcu_unlock:
 
 void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
 {
-	unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN;
+	unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
 	struct nfp_fl_stats_frame *stats_frame;
 	unsigned char *msg;
 	int i;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index a18b4d2b1d3e..cdbb5464b790 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -52,8 +52,26 @@
 	 BIT(FLOW_DISSECTOR_KEY_PORTS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_VLAN) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+	 BIT(FLOW_DISSECTOR_KEY_MPLS) | \
 	 BIT(FLOW_DISSECTOR_KEY_IP))
 
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \
+	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
+	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+
 static int
 nfp_flower_xmit_flow(struct net_device *netdev,
 		     struct nfp_fl_payload *nfp_flow, u8 mtype)
@@ -77,7 +95,7 @@ nfp_flower_xmit_flow(struct net_device *netdev,
 	nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ;
 	nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ;
 
-	skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype);
+	skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
@@ -117,7 +135,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
 {
 	struct flow_dissector_key_basic *mask_basic = NULL;
 	struct flow_dissector_key_basic *key_basic = NULL;
-	struct flow_dissector_key_ip *mask_ip = NULL;
 	u32 key_layer_two;
 	u8 key_layer;
 	int key_size;
@@ -125,15 +142,58 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
 	if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
 		return -EOPNOTSUPP;
 
+	/* If any tun dissector is used then the required set must be used. */
+	if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
+	    (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+		return -EOPNOTSUPP;
+
+	key_layer_two = 0;
+	key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
+	key_size = sizeof(struct nfp_flower_meta_one) +
+		   sizeof(struct nfp_flower_in_port) +
+		   sizeof(struct nfp_flower_mac_mpls);
+
 	if (dissector_uses_key(flow->dissector,
 			       FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
+		struct flow_dissector_key_ports *mask_enc_ports = NULL;
+		struct flow_dissector_key_ports *enc_ports = NULL;
 		struct flow_dissector_key_control *mask_enc_ctl =
 			skb_flow_dissector_target(flow->dissector,
 						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
 						  flow->mask);
-		/* We are expecting a tunnel. For now we ignore offloading. */
-		if (mask_enc_ctl->addr_type)
+		struct flow_dissector_key_control *enc_ctl =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
+						  flow->key);
+		if (mask_enc_ctl->addr_type != 0xffff ||
+		    enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
 			return -EOPNOTSUPP;
+
+		/* These fields are already verified as used. */
+		mask_ipv4 =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+						  flow->mask);
+		if (mask_ipv4->dst != cpu_to_be32(~0))
+			return -EOPNOTSUPP;
+
+		mask_enc_ports =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_PORTS,
+						  flow->mask);
+		enc_ports =
+			skb_flow_dissector_target(flow->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_PORTS,
+						  flow->key);
+
+		if (mask_enc_ports->dst != cpu_to_be16(~0) ||
+		    enc_ports->dst != htons(NFP_FL_VXLAN_PORT))
+			return -EOPNOTSUPP;
+
+		key_layer |= NFP_FLOWER_LAYER_VXLAN;
+		key_size += sizeof(struct nfp_flower_vxlan);
 	}
 
 	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
@@ -146,34 +206,15 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
 						      flow->key);
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP))
-		mask_ip = skb_flow_dissector_target(flow->dissector,
-						    FLOW_DISSECTOR_KEY_IP,
-						    flow->mask);
-
-	key_layer_two = 0;
-	key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
-	key_size = sizeof(struct nfp_flower_meta_one) +
-		   sizeof(struct nfp_flower_in_port) +
-		   sizeof(struct nfp_flower_mac_mpls);
-
 	if (mask_basic && mask_basic->n_proto) {
 		/* Ethernet type is present in the key. */
 		switch (key_basic->n_proto) {
 		case cpu_to_be16(ETH_P_IP):
-			if (mask_ip && mask_ip->tos)
-				return -EOPNOTSUPP;
-			if (mask_ip && mask_ip->ttl)
-				return -EOPNOTSUPP;
 			key_layer |= NFP_FLOWER_LAYER_IPV4;
 			key_size += sizeof(struct nfp_flower_ipv4);
 			break;
 
 		case cpu_to_be16(ETH_P_IPV6):
-			if (mask_ip && mask_ip->tos)
-				return -EOPNOTSUPP;
-			if (mask_ip && mask_ip->ttl)
-				return -EOPNOTSUPP;
 			key_layer |= NFP_FLOWER_LAYER_IPV6;
 			key_size += sizeof(struct nfp_flower_ipv6);
 			break;
@@ -184,11 +225,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
 		case cpu_to_be16(ETH_P_ARP):
 			return -EOPNOTSUPP;
 
-		/* Currently we do not offload MPLS. */
-		case cpu_to_be16(ETH_P_MPLS_UC):
-		case cpu_to_be16(ETH_P_MPLS_MC):
-			return -EOPNOTSUPP;
-
 		/* Will be included in layer 2. */
 		case cpu_to_be16(ETH_P_8021Q):
 			break;
@@ -252,6 +288,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
 	if (!flow_pay->action_data)
 		goto err_free_mask;
 
+	flow_pay->nfp_tun_ipv4_addr = 0;
 	flow_pay->meta.flags = 0;
 	spin_lock_init(&flow_pay->lock);
 
@@ -361,6 +398,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 	if (err)
 		goto err_free_flow;
 
+	if (nfp_flow->nfp_tun_ipv4_addr)
+		nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
+
 	err = nfp_flower_xmit_flow(netdev, nfp_flow,
 				   NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
 	if (err)
@@ -409,6 +449,10 @@ static int
 nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
 			struct tc_cls_flower_offload *flower)
 {
+	if (!eth_proto_is_802_3(flower->common.protocol) ||
+	    flower->common.chain_index)
+		return -EOPNOTSUPP;
+
 	switch (flower->command) {
 	case TC_CLSFLOWER_REPLACE:
 		return nfp_flower_add_offload(app, netdev, flower);
@@ -421,16 +465,53 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
 	return -EOPNOTSUPP;
 }
 
-int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			enum tc_setup_type type, void *type_data)
+static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type,
+					void *type_data, void *cb_priv)
 {
-	struct tc_cls_flower_offload *cls_flower = type_data;
+	struct nfp_repr *repr = cb_priv;
 
-	if (type != TC_SETUP_CLSFLOWER ||
-	    !is_classid_clsact_ingress(cls_flower->common.classid) ||
-	    !eth_proto_is_802_3(cls_flower->common.protocol) ||
-	    cls_flower->common.chain_index)
+	if (!tc_can_offload(repr->netdev))
 		return -EOPNOTSUPP;
 
-	return nfp_flower_repr_offload(app, netdev, cls_flower);
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return nfp_flower_repr_offload(repr->app, repr->netdev,
+					       type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int nfp_flower_setup_tc_block(struct net_device *netdev,
+				     struct tc_block_offload *f)
+{
+	struct nfp_repr *repr = netdev_priv(netdev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block,
+					     nfp_flower_setup_tc_block_cb,
+					     repr, repr);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block,
+					nfp_flower_setup_tc_block_cb,
+					repr);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+			enum tc_setup_type type, void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return nfp_flower_setup_tc_block(netdev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
new file mode 100644
index 000000000000..b03f22f29612
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -0,0 +1,804 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <net/netevent.h>
+#include <linux/idr.h>
+#include <net/dst_metadata.h>
+#include <net/arp.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_net_repr.h"
+#include "../nfp_net.h"
+
+#define NFP_FL_MAX_ROUTES               32
+
+/**
+ * struct nfp_tun_active_tuns - periodic message of active tunnels
+ * @seq:		sequence number of the message
+ * @count:		number of tunnels report in message
+ * @flags:		options part of the request
+ * @ipv4:		dest IPv4 address of active route
+ * @egress_port:	port the encapsulated packet egressed
+ * @extra:		reserved for future use
+ * @tun_info:		tunnels that have sent traffic in reported period
+ */
+struct nfp_tun_active_tuns {
+	__be32 seq;
+	__be32 count;
+	__be32 flags;
+	struct route_ip_info {
+		__be32 ipv4;
+		__be32 egress_port;
+		__be32 extra[2];
+	} tun_info[];
+};
+
+/**
+ * struct nfp_tun_neigh - neighbour/route entry on the NFP
+ * @dst_ipv4:	destination IPv4 address
+ * @src_ipv4:	source IPv4 address
+ * @dst_addr:	destination MAC address
+ * @src_addr:	source MAC address
+ * @port_id:	NFP port to output packet on - associated with source IPv4
+ */
+struct nfp_tun_neigh {
+	__be32 dst_ipv4;
+	__be32 src_ipv4;
+	u8 dst_addr[ETH_ALEN];
+	u8 src_addr[ETH_ALEN];
+	__be32 port_id;
+};
+
+/**
+ * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup
+ * @ingress_port:	ingress port of packet that signalled request
+ * @ipv4_addr:		destination ipv4 address for route
+ * @reserved:		reserved for future use
+ */
+struct nfp_tun_req_route_ipv4 {
+	__be32 ingress_port;
+	__be32 ipv4_addr;
+	__be32 reserved[2];
+};
+
+/**
+ * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP
+ * @ipv4_addr:	destination of route
+ * @list:	list pointer
+ */
+struct nfp_ipv4_route_entry {
+	__be32 ipv4_addr;
+	struct list_head list;
+};
+
+#define NFP_FL_IPV4_ADDRS_MAX        32
+
+/**
+ * struct nfp_tun_ipv4_addr - set the IP address list on the NFP
+ * @count:	number of IPs populated in the array
+ * @ipv4_addr:	array of IPV4_ADDRS_MAX 32 bit IPv4 addresses
+ */
+struct nfp_tun_ipv4_addr {
+	__be32 count;
+	__be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX];
+};
+
+/**
+ * struct nfp_ipv4_addr_entry - cached IPv4 addresses
+ * @ipv4_addr:	IP address
+ * @ref_count:	number of rules currently using this IP
+ * @list:	list pointer
+ */
+struct nfp_ipv4_addr_entry {
+	__be32 ipv4_addr;
+	int ref_count;
+	struct list_head list;
+};
+
+/**
+ * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP
+ * @reserved:	reserved for future use
+ * @count:	number of MAC addresses in the message
+ * @index:	index of MAC address in the lookup table
+ * @addr:	interface MAC address
+ * @addresses:	series of MACs to offload
+ */
+struct nfp_tun_mac_addr {
+	__be16 reserved;
+	__be16 count;
+	struct index_mac_addr {
+		__be16 index;
+		u8 addr[ETH_ALEN];
+	} addresses[];
+};
+
+/**
+ * struct nfp_tun_mac_offload_entry - list of MACs to offload
+ * @index:	index of MAC address for offloading
+ * @addr:	interface MAC address
+ * @list:	list pointer
+ */
+struct nfp_tun_mac_offload_entry {
+	__be16 index;
+	u8 addr[ETH_ALEN];
+	struct list_head list;
+};
+
+#define NFP_MAX_MAC_INDEX       0xff
+
+/**
+ * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id
+ * @ifindex:	netdev ifindex of the device
+ * @index:	index of netdevs mac on NFP
+ * @list:	list pointer
+ */
+struct nfp_tun_mac_non_nfp_idx {
+	int ifindex;
+	u8 index;
+	struct list_head list;
+};
+
+void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_tun_active_tuns *payload;
+	struct net_device *netdev;
+	int count, i, pay_len;
+	struct neighbour *n;
+	__be32 ipv4_addr;
+	u32 port;
+
+	payload = nfp_flower_cmsg_get_data(skb);
+	count = be32_to_cpu(payload->count);
+	if (count > NFP_FL_MAX_ROUTES) {
+		nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n");
+		return;
+	}
+
+	pay_len = nfp_flower_cmsg_get_data_len(skb);
+	if (pay_len != sizeof(struct nfp_tun_active_tuns) +
+	    sizeof(struct route_ip_info) * count) {
+		nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n");
+		return;
+	}
+
+	for (i = 0; i < count; i++) {
+		ipv4_addr = payload->tun_info[i].ipv4;
+		port = be32_to_cpu(payload->tun_info[i].egress_port);
+		netdev = nfp_app_repr_get(app, port);
+		if (!netdev)
+			continue;
+
+		n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev);
+		if (!n)
+			continue;
+
+		/* Update the used timestamp of neighbour */
+		neigh_event_send(n, NULL);
+		neigh_release(n);
+	}
+}
+
+static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
+{
+	if (!netdev->rtnl_link_ops)
+		return false;
+	if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
+		return true;
+	if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
+		return true;
+
+	return false;
+}
+
+static int
+nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata,
+			 gfp_t flag)
+{
+	struct sk_buff *skb;
+	unsigned char *msg;
+
+	skb = nfp_flower_cmsg_alloc(app, plen, mtype, flag);
+	if (!skb)
+		return -ENOMEM;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+	memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb));
+
+	nfp_ctrl_tx(app->ctrl, skb);
+	return 0;
+}
+
+static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *entry;
+	struct list_head *ptr, *storage;
+
+	spin_lock_bh(&priv->nfp_neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+		if (entry->ipv4_addr == ipv4_addr) {
+			spin_unlock_bh(&priv->nfp_neigh_off_lock);
+			return true;
+		}
+	}
+	spin_unlock_bh(&priv->nfp_neigh_off_lock);
+	return false;
+}
+
+static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *entry;
+	struct list_head *ptr, *storage;
+
+	spin_lock_bh(&priv->nfp_neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+		if (entry->ipv4_addr == ipv4_addr) {
+			spin_unlock_bh(&priv->nfp_neigh_off_lock);
+			return;
+		}
+	}
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry) {
+		spin_unlock_bh(&priv->nfp_neigh_off_lock);
+		nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n");
+		return;
+	}
+
+	entry->ipv4_addr = ipv4_addr;
+	list_add_tail(&entry->list, &priv->nfp_neigh_off_list);
+	spin_unlock_bh(&priv->nfp_neigh_off_lock);
+}
+
+static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *entry;
+	struct list_head *ptr, *storage;
+
+	spin_lock_bh(&priv->nfp_neigh_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+		if (entry->ipv4_addr == ipv4_addr) {
+			list_del(&entry->list);
+			kfree(entry);
+			break;
+		}
+	}
+	spin_unlock_bh(&priv->nfp_neigh_off_lock);
+}
+
+static void
+nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
+		    struct flowi4 *flow, struct neighbour *neigh, gfp_t flag)
+{
+	struct nfp_tun_neigh payload;
+
+	/* Only offload representor IPv4s for now. */
+	if (!nfp_netdev_is_nfp_repr(netdev))
+		return;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_neigh));
+	payload.dst_ipv4 = flow->daddr;
+
+	/* If entry has expired send dst IP with all other fields 0. */
+	if (!(neigh->nud_state & NUD_VALID)) {
+		nfp_tun_del_route_from_cache(app, payload.dst_ipv4);
+		/* Trigger ARP to verify invalid neighbour state. */
+		neigh_event_send(neigh, NULL);
+		goto send_msg;
+	}
+
+	/* Have a valid neighbour so populate rest of entry. */
+	payload.src_ipv4 = flow->saddr;
+	ether_addr_copy(payload.src_addr, netdev->dev_addr);
+	neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
+	payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev));
+	/* Add destination of new route to NFP cache. */
+	nfp_tun_add_route_to_cache(app, payload.dst_ipv4);
+
+send_msg:
+	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH,
+				 sizeof(struct nfp_tun_neigh),
+				 (unsigned char *)&payload, flag);
+}
+
+static int
+nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
+			    void *ptr)
+{
+	struct nfp_flower_priv *app_priv;
+	struct netevent_redirect *redir;
+	struct flowi4 flow = {};
+	struct neighbour *n;
+	struct nfp_app *app;
+	struct rtable *rt;
+	int err;
+
+	switch (event) {
+	case NETEVENT_REDIRECT:
+		redir = (struct netevent_redirect *)ptr;
+		n = redir->neigh;
+		break;
+	case NETEVENT_NEIGH_UPDATE:
+		n = (struct neighbour *)ptr;
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	flow.daddr = *(__be32 *)n->primary_key;
+
+	/* Only concerned with route changes for representors. */
+	if (!nfp_netdev_is_nfp_repr(n->dev))
+		return NOTIFY_DONE;
+
+	app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb);
+	app = app_priv->app;
+
+	/* Only concerned with changes to routes already added to NFP. */
+	if (!nfp_tun_has_route(app, flow.daddr))
+		return NOTIFY_DONE;
+
+#if IS_ENABLED(CONFIG_INET)
+	/* Do a route lookup to populate flow data. */
+	rt = ip_route_output_key(dev_net(n->dev), &flow);
+	err = PTR_ERR_OR_ZERO(rt);
+	if (err)
+		return NOTIFY_DONE;
+#else
+	return NOTIFY_DONE;
+#endif
+
+	flow.flowi4_proto = IPPROTO_UDP;
+	nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC);
+
+	return NOTIFY_OK;
+}
+
+void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_tun_req_route_ipv4 *payload;
+	struct net_device *netdev;
+	struct flowi4 flow = {};
+	struct neighbour *n;
+	struct rtable *rt;
+	int err;
+
+	payload = nfp_flower_cmsg_get_data(skb);
+
+	netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port));
+	if (!netdev)
+		goto route_fail_warning;
+
+	flow.daddr = payload->ipv4_addr;
+	flow.flowi4_proto = IPPROTO_UDP;
+
+#if IS_ENABLED(CONFIG_INET)
+	/* Do a route lookup on same namespace as ingress port. */
+	rt = ip_route_output_key(dev_net(netdev), &flow);
+	err = PTR_ERR_OR_ZERO(rt);
+	if (err)
+		goto route_fail_warning;
+#else
+	goto route_fail_warning;
+#endif
+
+	/* Get the neighbour entry for the lookup */
+	n = dst_neigh_lookup(&rt->dst, &flow.daddr);
+	ip_rt_put(rt);
+	if (!n)
+		goto route_fail_warning;
+	nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_KERNEL);
+	neigh_release(n);
+	return;
+
+route_fail_warning:
+	nfp_flower_cmsg_warn(app, "Requested route not found.\n");
+}
+
+static void nfp_tun_write_ipv4_list(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_addr_entry *entry;
+	struct nfp_tun_ipv4_addr payload;
+	struct list_head *ptr, *storage;
+	int count;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr));
+	mutex_lock(&priv->nfp_ipv4_off_lock);
+	count = 0;
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		if (count >= NFP_FL_IPV4_ADDRS_MAX) {
+			mutex_unlock(&priv->nfp_ipv4_off_lock);
+			nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n");
+			return;
+		}
+		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		payload.ipv4_addr[count++] = entry->ipv4_addr;
+	}
+	payload.count = cpu_to_be32(count);
+	mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS,
+				 sizeof(struct nfp_tun_ipv4_addr),
+				 &payload, GFP_KERNEL);
+}
+
+void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_addr_entry *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_ipv4_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		if (entry->ipv4_addr == ipv4) {
+			entry->ref_count++;
+			mutex_unlock(&priv->nfp_ipv4_off_lock);
+			return;
+		}
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		mutex_unlock(&priv->nfp_ipv4_off_lock);
+		nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n");
+		return;
+	}
+	entry->ipv4_addr = ipv4;
+	entry->ref_count = 1;
+	list_add_tail(&entry->list, &priv->nfp_ipv4_off_list);
+	mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+	nfp_tun_write_ipv4_list(app);
+}
+
+void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_addr_entry *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_ipv4_off_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		if (entry->ipv4_addr == ipv4) {
+			entry->ref_count--;
+			if (!entry->ref_count) {
+				list_del(&entry->list);
+				kfree(entry);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+	nfp_tun_write_ipv4_list(app);
+}
+
+void nfp_tunnel_write_macs(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_offload_entry *entry;
+	struct nfp_tun_mac_addr *payload;
+	struct list_head *ptr, *storage;
+	int mac_count, err, pay_size;
+
+	mutex_lock(&priv->nfp_mac_off_lock);
+	if (!priv->nfp_mac_off_count) {
+		mutex_unlock(&priv->nfp_mac_off_lock);
+		return;
+	}
+
+	pay_size = sizeof(struct nfp_tun_mac_addr) +
+		   sizeof(struct index_mac_addr) * priv->nfp_mac_off_count;
+
+	payload = kzalloc(pay_size, GFP_KERNEL);
+	if (!payload) {
+		mutex_unlock(&priv->nfp_mac_off_lock);
+		return;
+	}
+
+	payload->count = cpu_to_be16(priv->nfp_mac_off_count);
+
+	mac_count = 0;
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+				   list);
+		payload->addresses[mac_count].index = entry->index;
+		ether_addr_copy(payload->addresses[mac_count].addr,
+				entry->addr);
+		mac_count++;
+	}
+
+	err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC,
+				       pay_size, payload, GFP_KERNEL);
+
+	kfree(payload);
+
+	if (err) {
+		mutex_unlock(&priv->nfp_mac_off_lock);
+		/* Write failed so retain list for future retry. */
+		return;
+	}
+
+	/* If list was successfully offloaded, flush it. */
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+				   list);
+		list_del(&entry->list);
+		kfree(entry);
+	}
+
+	priv->nfp_mac_off_count = 0;
+	mutex_unlock(&priv->nfp_mac_off_lock);
+}
+
+static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_non_nfp_idx *entry;
+	struct list_head *ptr, *storage;
+	int idx;
+
+	mutex_lock(&priv->nfp_mac_index_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
+		if (entry->ifindex == ifindex) {
+			idx = entry->index;
+			mutex_unlock(&priv->nfp_mac_index_lock);
+			return idx;
+		}
+	}
+
+	idx = ida_simple_get(&priv->nfp_mac_off_ids, 0,
+			     NFP_MAX_MAC_INDEX, GFP_KERNEL);
+	if (idx < 0) {
+		mutex_unlock(&priv->nfp_mac_index_lock);
+		return idx;
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		mutex_unlock(&priv->nfp_mac_index_lock);
+		return -ENOMEM;
+	}
+	entry->ifindex = ifindex;
+	entry->index = idx;
+	list_add_tail(&entry->list, &priv->nfp_mac_index_list);
+	mutex_unlock(&priv->nfp_mac_index_lock);
+
+	return idx;
+}
+
+static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_non_nfp_idx *entry;
+	struct list_head *ptr, *storage;
+
+	mutex_lock(&priv->nfp_mac_index_lock);
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+		entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
+		if (entry->ifindex == ifindex) {
+			ida_simple_remove(&priv->nfp_mac_off_ids,
+					  entry->index);
+			list_del(&entry->list);
+			kfree(entry);
+			break;
+		}
+	}
+	mutex_unlock(&priv->nfp_mac_index_lock);
+}
+
+static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev,
+					    struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_tun_mac_offload_entry *entry;
+	u16 nfp_mac_idx;
+	int port = 0;
+
+	/* Check if MAC should be offloaded. */
+	if (!is_valid_ether_addr(netdev->dev_addr))
+		return;
+
+	if (nfp_netdev_is_nfp_repr(netdev))
+		port = nfp_repr_get_port_id(netdev);
+	else if (!nfp_tun_is_netdev_to_offload(netdev))
+		return;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n");
+		return;
+	}
+
+	if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
+	    NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) {
+		nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT;
+	} else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
+		   NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) {
+		port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port);
+		nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT;
+	} else {
+		/* Must assign our own unique 8-bit index. */
+		int idx = nfp_tun_get_mac_idx(app, netdev->ifindex);
+
+		if (idx < 0) {
+			nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n");
+			kfree(entry);
+			return;
+		}
+		nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
+	}
+
+	entry->index = cpu_to_be16(nfp_mac_idx);
+	ether_addr_copy(entry->addr, netdev->dev_addr);
+
+	mutex_lock(&priv->nfp_mac_off_lock);
+	priv->nfp_mac_off_count++;
+	list_add_tail(&entry->list, &priv->nfp_mac_off_list);
+	mutex_unlock(&priv->nfp_mac_off_lock);
+}
+
+static int nfp_tun_mac_event_handler(struct notifier_block *nb,
+				     unsigned long event, void *ptr)
+{
+	struct nfp_flower_priv *app_priv;
+	struct net_device *netdev;
+	struct nfp_app *app;
+
+	if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) {
+		app_priv = container_of(nb, struct nfp_flower_priv,
+					nfp_tun_mac_nb);
+		app = app_priv->app;
+		netdev = netdev_notifier_info_to_dev(ptr);
+
+		/* If non-nfp netdev then free its offload index. */
+		if (nfp_tun_is_netdev_to_offload(netdev))
+			nfp_tun_del_mac_idx(app, netdev->ifindex);
+	} else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR ||
+		   event == NETDEV_REGISTER) {
+		app_priv = container_of(nb, struct nfp_flower_priv,
+					nfp_tun_mac_nb);
+		app = app_priv->app;
+		netdev = netdev_notifier_info_to_dev(ptr);
+
+		nfp_tun_add_to_mac_offload_list(netdev, app);
+
+		/* Force a list write to keep NFP up to date. */
+		nfp_tunnel_write_macs(app);
+	}
+	return NOTIFY_OK;
+}
+
+int nfp_tunnel_config_start(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct net_device *netdev;
+	int err;
+
+	/* Initialise priv data for MAC offloading. */
+	priv->nfp_mac_off_count = 0;
+	mutex_init(&priv->nfp_mac_off_lock);
+	INIT_LIST_HEAD(&priv->nfp_mac_off_list);
+	priv->nfp_tun_mac_nb.notifier_call = nfp_tun_mac_event_handler;
+	mutex_init(&priv->nfp_mac_index_lock);
+	INIT_LIST_HEAD(&priv->nfp_mac_index_list);
+	ida_init(&priv->nfp_mac_off_ids);
+
+	/* Initialise priv data for IPv4 offloading. */
+	mutex_init(&priv->nfp_ipv4_off_lock);
+	INIT_LIST_HEAD(&priv->nfp_ipv4_off_list);
+
+	/* Initialise priv data for neighbour offloading. */
+	spin_lock_init(&priv->nfp_neigh_off_lock);
+	INIT_LIST_HEAD(&priv->nfp_neigh_off_list);
+	priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
+
+	err = register_netdevice_notifier(&priv->nfp_tun_mac_nb);
+	if (err)
+		goto err_free_mac_ida;
+
+	err = register_netevent_notifier(&priv->nfp_tun_neigh_nb);
+	if (err)
+		goto err_unreg_mac_nb;
+
+	/* Parse netdevs already registered for MACs that need offloaded. */
+	rtnl_lock();
+	for_each_netdev(&init_net, netdev)
+		nfp_tun_add_to_mac_offload_list(netdev, app);
+	rtnl_unlock();
+
+	return 0;
+
+err_unreg_mac_nb:
+	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
+err_free_mac_ida:
+	ida_destroy(&priv->nfp_mac_off_ids);
+	return err;
+}
+
+void nfp_tunnel_config_stop(struct nfp_app *app)
+{
+	struct nfp_tun_mac_offload_entry *mac_entry;
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv4_route_entry *route_entry;
+	struct nfp_tun_mac_non_nfp_idx *mac_idx;
+	struct nfp_ipv4_addr_entry *ip_entry;
+	struct list_head *ptr, *storage;
+
+	unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
+	unregister_netevent_notifier(&priv->nfp_tun_neigh_nb);
+
+	/* Free any memory that may be occupied by MAC list. */
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+		mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+				       list);
+		list_del(&mac_entry->list);
+		kfree(mac_entry);
+	}
+
+	/* Free any memory that may be occupied by MAC index list. */
+	list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+		mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx,
+				     list);
+		list_del(&mac_idx->list);
+		kfree(mac_idx);
+	}
+
+	ida_destroy(&priv->nfp_mac_off_ids);
+
+	/* Free any memory that may be occupied by ipv4 list. */
+	list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+		ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+		list_del(&ip_entry->list);
+		kfree(ip_entry);
+	}
+
+	/* Free any memory that may be occupied by the route list. */
+	list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+		route_entry = list_entry(ptr, struct nfp_ipv4_route_entry,
+					 list);
+		list_del(&route_entry->list);
+		kfree(route_entry);
+	}
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 82c290763529..955a9f44d244 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/bug.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 
@@ -42,10 +43,14 @@
 #include "nfp_net_repr.h"
 
 static const struct nfp_app_type *apps[] = {
-	&app_nic,
-	&app_bpf,
+	[NFP_APP_CORE_NIC]	= &app_nic,
+#ifdef CONFIG_BPF_SYSCALL
+	[NFP_APP_BPF_NIC]	= &app_bpf,
+#else
+	[NFP_APP_BPF_NIC]	= &app_nic,
+#endif
 #ifdef CONFIG_NFP_APP_FLOWER
-	&app_flower,
+	[NFP_APP_FLOWER_NIC]	= &app_flower,
 #endif
 };
 
@@ -101,31 +106,21 @@ nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type,
 
 	old = rcu_dereference_protected(app->reprs[type],
 					lockdep_is_held(&app->pf->lock));
-	if (reprs && old) {
-		old = ERR_PTR(-EBUSY);
-		goto exit_unlock;
-	}
-
 	rcu_assign_pointer(app->reprs[type], reprs);
 
-exit_unlock:
 	return old;
 }
 
 struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)
 {
 	struct nfp_app *app;
-	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(apps); i++)
-		if (apps[i]->id == id)
-			break;
-	if (i == ARRAY_SIZE(apps)) {
+	if (id >= ARRAY_SIZE(apps) || !apps[id]) {
 		nfp_err(pf->cpp, "failed to find app with ID 0x%02hhx\n", id);
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (WARN_ON(!apps[i]->name || !apps[i]->vnic_alloc))
+	if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc))
 		return ERR_PTR(-EINVAL);
 
 	app = kzalloc(sizeof(*app), GFP_KERNEL);
@@ -135,7 +130,7 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)
 	app->pf = pf;
 	app->cpp = pf->cpp;
 	app->pdev = pf->pdev;
-	app->type = apps[i];
+	app->type = apps[id];
 
 	return app;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index af640b5c2108..54b67c9b8d5b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -36,10 +36,13 @@
 
 #include <net/devlink.h>
 
+#include <trace/events/devlink.h>
+
 #include "nfp_net_repr.h"
 
 struct bpf_prog;
 struct net_device;
+struct netdev_bpf;
 struct pci_dev;
 struct sk_buff;
 struct sk_buff;
@@ -81,6 +84,9 @@ extern const struct nfp_app_type app_flower;
  * @setup_tc:	setup TC ndo
  * @tc_busy:	TC HW offload busy (rules loaded)
  * @xdp_offload:    offload an XDP program
+ * @bpf_verifier_prep:	verifier prep for dev-specific BPF programs
+ * @bpf_translate:	translate call for dev-specific BPF programs
+ * @bpf_destroy:	destroy for dev-specific BPF programs
  * @eswitch_mode_get:    get SR-IOV eswitch mode
  * @sriov_enable: app-specific sriov initialisation
  * @sriov_disable: app-specific sriov clean-up
@@ -116,6 +122,12 @@ struct nfp_app_type {
 	bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn);
 	int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn,
 			   struct bpf_prog *prog);
+	int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn,
+				 struct netdev_bpf *bpf);
+	int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn,
+			     struct bpf_prog *prog);
+	int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn,
+			   struct bpf_prog *prog);
 
 	int (*sriov_enable)(struct nfp_app *app, int num_vfs);
 	void (*sriov_disable)(struct nfp_app *app);
@@ -269,13 +281,46 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
 	return app->type->xdp_offload(app, nn, prog);
 }
 
+static inline int
+nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn,
+			  struct netdev_bpf *bpf)
+{
+	if (!app || !app->type->bpf_verifier_prep)
+		return -EOPNOTSUPP;
+	return app->type->bpf_verifier_prep(app, nn, bpf);
+}
+
+static inline int
+nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn,
+		      struct bpf_prog *prog)
+{
+	if (!app || !app->type->bpf_translate)
+		return -EOPNOTSUPP;
+	return app->type->bpf_translate(app, nn, prog);
+}
+
+static inline int
+nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
+		    struct bpf_prog *prog)
+{
+	if (!app || !app->type->bpf_destroy)
+		return -EOPNOTSUPP;
+	return app->type->bpf_destroy(app, nn, prog);
+}
+
 static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
 {
+	trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0,
+			    skb->data, skb->len);
+
 	return nfp_ctrl_tx(app->ctrl, skb);
 }
 
 static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb)
 {
+	trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0,
+			    skb->data, skb->len);
+
 	app->type->ctrl_msg_rx(app, skb);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
new file mode 100644
index 000000000000..830f6de25f47
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "nfp_asm.h"
+
+const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
+	[CMD_TGT_WRITE8_SWAP] =		{ 0x02, 0x42 },
+	[CMD_TGT_READ8] =		{ 0x01, 0x43 },
+	[CMD_TGT_READ32] =		{ 0x00, 0x5c },
+	[CMD_TGT_READ32_LE] =		{ 0x01, 0x5c },
+	[CMD_TGT_READ32_SWAP] =		{ 0x02, 0x5c },
+	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
+	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+};
+
+static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst)
+{
+	bool lm_id, lm_dec = false;
+	u16 val = swreg_value(reg);
+
+	switch (swreg_type(reg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_NNR:
+		return UR_REG_NN | val;
+	case NN_REG_XFER:
+		return UR_REG_XFR | val;
+	case NN_REG_LMEM:
+		lm_id = swreg_lm_idx(reg);
+
+		switch (swreg_lm_mode(reg)) {
+		case NN_LM_MOD_NONE:
+			if (val & ~UR_REG_LM_IDX_MAX) {
+				pr_err("LM offset too large\n");
+				return 0;
+			}
+			return UR_REG_LM | FIELD_PREP(UR_REG_LM_IDX, lm_id) |
+				val;
+		case NN_LM_MOD_DEC:
+			lm_dec = true;
+			/* fall through */
+		case NN_LM_MOD_INC:
+			if (val) {
+				pr_err("LM offset in inc/dev mode\n");
+				return 0;
+			}
+			return UR_REG_LM | UR_REG_LM_POST_MOD |
+				FIELD_PREP(UR_REG_LM_IDX, lm_id) |
+				FIELD_PREP(UR_REG_LM_POST_MOD_DEC, lm_dec);
+		default:
+			pr_err("bad LM mode for unrestricted operands %d\n",
+			       swreg_lm_mode(reg));
+			return 0;
+		}
+	case NN_REG_IMM:
+		if (val & ~0xff) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		return UR_REG_IMM_encode(val);
+	case NN_REG_NONE:
+		return is_dst ? UR_REG_NO_DST : REG_NONE;
+	}
+
+	pr_err("unrecognized reg encoding %08x\n", reg);
+	return 0;
+}
+
+int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg,
+			  struct nfp_insn_ur_regs *reg)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (swreg_type(dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (swreg_type(dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (swreg_type(dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_unreg(dst, true);
+
+	/* Decode source operands */
+	if (swreg_type(lreg) == swreg_type(rreg))
+		return -EFAULT;
+
+	if (swreg_type(lreg) == NN_REG_GPR_B ||
+	    swreg_type(rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_unreg(rreg, false);
+		reg->breg = nfp_swreg_to_unreg(lreg, false);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_unreg(lreg, false);
+		reg->breg = nfp_swreg_to_unreg(rreg, false);
+	}
+
+	reg->dst_lmextn = swreg_lmextn(dst);
+	reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg);
+
+	return 0;
+}
+
+static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8)
+{
+	u16 val = swreg_value(reg);
+	bool lm_id;
+
+	switch (swreg_type(reg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_XFER:
+		return RE_REG_XFR | val;
+	case NN_REG_LMEM:
+		lm_id = swreg_lm_idx(reg);
+
+		if (swreg_lm_mode(reg) != NN_LM_MOD_NONE) {
+			pr_err("bad LM mode for restricted operands %d\n",
+			       swreg_lm_mode(reg));
+			return 0;
+		}
+
+		if (val & ~RE_REG_LM_IDX_MAX) {
+			pr_err("LM offset too large\n");
+			return 0;
+		}
+
+		return RE_REG_LM | FIELD_PREP(RE_REG_LM_IDX, lm_id) | val;
+	case NN_REG_IMM:
+		if (val & ~(0x7f | has_imm8 << 7)) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		*i8 = val & 0x80;
+		return RE_REG_IMM_encode(val & 0x7f);
+	case NN_REG_NONE:
+		return is_dst ? RE_REG_NO_DST : REG_NONE;
+	case NN_REG_NNR:
+		pr_err("NNRs used with restricted encoding\n");
+		return 0;
+	}
+
+	pr_err("unrecognized reg encoding\n");
+	return 0;
+}
+
+int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
+			struct nfp_insn_re_regs *reg, bool has_imm8)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (swreg_type(dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (swreg_type(dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (swreg_type(dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
+
+	/* Decode source operands */
+	if (swreg_type(lreg) == swreg_type(rreg))
+		return -EFAULT;
+
+	if (swreg_type(lreg) == NN_REG_GPR_B ||
+	    swreg_type(rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+	}
+
+	reg->dst_lmextn = swreg_lmextn(dst);
+	reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg);
+
+	return 0;
+}
+
+#define NFP_USTORE_ECC_POLY_WORDS		7
+#define NFP_USTORE_OP_BITS			45
+
+static const u64 nfp_ustore_ecc_polynomials[NFP_USTORE_ECC_POLY_WORDS] = {
+	0x0ff800007fffULL,
+	0x11f801ff801fULL,
+	0x1e387e0781e1ULL,
+	0x17cb8e388e22ULL,
+	0x1af5b2c93244ULL,
+	0x1f56d5525488ULL,
+	0x0daf69a46910ULL,
+};
+
+static bool parity(u64 value)
+{
+	return hweight64(value) & 1;
+}
+
+int nfp_ustore_check_valid_no_ecc(u64 insn)
+{
+	if (insn & ~GENMASK_ULL(NFP_USTORE_OP_BITS, 0))
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 nfp_ustore_calc_ecc_insn(u64 insn)
+{
+	u8 ecc = 0;
+	int i;
+
+	for (i = 0; i < NFP_USTORE_ECC_POLY_WORDS; i++)
+		ecc |= parity(nfp_ustore_ecc_polynomials[i] & insn) << i;
+
+	return insn | (u64)ecc << NFP_USTORE_OP_BITS;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index d2b535739d2b..74d0c11ab2f9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -34,6 +34,8 @@
 #ifndef __NFP_ASM_H__
 #define __NFP_ASM_H__ 1
 
+#include <linux/bitfield.h>
+#include <linux/bug.h>
 #include <linux/types.h>
 
 #define REG_NONE	0
@@ -43,23 +45,31 @@
 #define RE_REG_IMM_encode(x)					\
 	(RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
 #define RE_REG_IMM_MAX	 0x07fULL
+#define RE_REG_LM	0x050
+#define RE_REG_LM_IDX	0x008
+#define RE_REG_LM_IDX_MAX	0x7
 #define RE_REG_XFR	0x080
 
 #define UR_REG_XFR	0x180
+#define UR_REG_LM	0x200
+#define UR_REG_LM_IDX	0x020
+#define UR_REG_LM_POST_MOD	0x010
+#define UR_REG_LM_POST_MOD_DEC	0x001
+#define UR_REG_LM_IDX_MAX	0xf
 #define UR_REG_NN	0x280
 #define UR_REG_NO_DST	0x300
 #define UR_REG_IMM	UR_REG_NO_DST
 #define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
 #define UR_REG_IMM_MAX	 0x0ffULL
 
-#define OP_BR_BASE	0x0d800000020ULL
-#define OP_BR_BASE_MASK	0x0f8000c3ce0ULL
-#define OP_BR_MASK	0x0000000001fULL
-#define OP_BR_EV_PIP	0x00000000300ULL
-#define OP_BR_CSS	0x0000003c000ULL
-#define OP_BR_DEFBR	0x00000300000ULL
-#define OP_BR_ADDR_LO	0x007ffc00000ULL
-#define OP_BR_ADDR_HI	0x10000000000ULL
+#define OP_BR_BASE		0x0d800000020ULL
+#define OP_BR_BASE_MASK		0x0f8000c3ce0ULL
+#define OP_BR_MASK		0x0000000001fULL
+#define OP_BR_EV_PIP		0x00000000300ULL
+#define OP_BR_CSS		0x0000003c000ULL
+#define OP_BR_DEFBR		0x00000300000ULL
+#define OP_BR_ADDR_LO		0x007ffc00000ULL
+#define OP_BR_ADDR_HI		0x10000000000ULL
 
 #define nfp_is_br(_insn)				\
 	(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
@@ -82,30 +92,33 @@ enum br_ctx_signal_state {
 	BR_CSS_NONE = 2,
 };
 
-#define OP_BBYTE_BASE	0x0c800000000ULL
-#define OP_BB_A_SRC	0x000000000ffULL
-#define OP_BB_BYTE	0x00000000300ULL
-#define OP_BB_B_SRC	0x0000003fc00ULL
-#define OP_BB_I8	0x00000040000ULL
-#define OP_BB_EQ	0x00000080000ULL
-#define OP_BB_DEFBR	0x00000300000ULL
-#define OP_BB_ADDR_LO	0x007ffc00000ULL
-#define OP_BB_ADDR_HI	0x10000000000ULL
-
-#define OP_BALU_BASE	0x0e800000000ULL
-#define OP_BA_A_SRC	0x000000003ffULL
-#define OP_BA_B_SRC	0x000000ffc00ULL
-#define OP_BA_DEFBR	0x00000300000ULL
-#define OP_BA_ADDR_HI	0x0007fc00000ULL
-
-#define OP_IMMED_A_SRC	0x000000003ffULL
-#define OP_IMMED_B_SRC	0x000000ffc00ULL
-#define OP_IMMED_IMM	0x0000ff00000ULL
-#define OP_IMMED_WIDTH	0x00060000000ULL
-#define OP_IMMED_INV	0x00080000000ULL
-#define OP_IMMED_SHIFT	0x00600000000ULL
-#define OP_IMMED_BASE	0x0f000000000ULL
-#define OP_IMMED_WR_AB	0x20000000000ULL
+#define OP_BBYTE_BASE		0x0c800000000ULL
+#define OP_BB_A_SRC		0x000000000ffULL
+#define OP_BB_BYTE		0x00000000300ULL
+#define OP_BB_B_SRC		0x0000003fc00ULL
+#define OP_BB_I8		0x00000040000ULL
+#define OP_BB_EQ		0x00000080000ULL
+#define OP_BB_DEFBR		0x00000300000ULL
+#define OP_BB_ADDR_LO		0x007ffc00000ULL
+#define OP_BB_ADDR_HI		0x10000000000ULL
+#define OP_BB_SRC_LMEXTN	0x40000000000ULL
+
+#define OP_BALU_BASE		0x0e800000000ULL
+#define OP_BA_A_SRC		0x000000003ffULL
+#define OP_BA_B_SRC		0x000000ffc00ULL
+#define OP_BA_DEFBR		0x00000300000ULL
+#define OP_BA_ADDR_HI		0x0007fc00000ULL
+
+#define OP_IMMED_A_SRC		0x000000003ffULL
+#define OP_IMMED_B_SRC		0x000000ffc00ULL
+#define OP_IMMED_IMM		0x0000ff00000ULL
+#define OP_IMMED_WIDTH		0x00060000000ULL
+#define OP_IMMED_INV		0x00080000000ULL
+#define OP_IMMED_SHIFT		0x00600000000ULL
+#define OP_IMMED_BASE		0x0f000000000ULL
+#define OP_IMMED_WR_AB		0x20000000000ULL
+#define OP_IMMED_SRC_LMEXTN	0x40000000000ULL
+#define OP_IMMED_DST_LMEXTN	0x80000000000ULL
 
 enum immed_width {
 	IMMED_WIDTH_ALL = 0,
@@ -119,17 +132,19 @@ enum immed_shift {
 	IMMED_SHIFT_2B = 2,
 };
 
-#define OP_SHF_BASE	0x08000000000ULL
-#define OP_SHF_A_SRC	0x000000000ffULL
-#define OP_SHF_SC	0x00000000300ULL
-#define OP_SHF_B_SRC	0x0000003fc00ULL
-#define OP_SHF_I8	0x00000040000ULL
-#define OP_SHF_SW	0x00000080000ULL
-#define OP_SHF_DST	0x0000ff00000ULL
-#define OP_SHF_SHIFT	0x001f0000000ULL
-#define OP_SHF_OP	0x00e00000000ULL
-#define OP_SHF_DST_AB	0x01000000000ULL
-#define OP_SHF_WR_AB	0x20000000000ULL
+#define OP_SHF_BASE		0x08000000000ULL
+#define OP_SHF_A_SRC		0x000000000ffULL
+#define OP_SHF_SC		0x00000000300ULL
+#define OP_SHF_B_SRC		0x0000003fc00ULL
+#define OP_SHF_I8		0x00000040000ULL
+#define OP_SHF_SW		0x00000080000ULL
+#define OP_SHF_DST		0x0000ff00000ULL
+#define OP_SHF_SHIFT		0x001f0000000ULL
+#define OP_SHF_OP		0x00e00000000ULL
+#define OP_SHF_DST_AB		0x01000000000ULL
+#define OP_SHF_WR_AB		0x20000000000ULL
+#define OP_SHF_SRC_LMEXTN	0x40000000000ULL
+#define OP_SHF_DST_LMEXTN	0x80000000000ULL
 
 enum shf_op {
 	SHF_OP_NONE = 0,
@@ -139,24 +154,27 @@ enum shf_op {
 
 enum shf_sc {
 	SHF_SC_R_ROT = 0,
+	SHF_SC_NONE = SHF_SC_R_ROT,
 	SHF_SC_R_SHF = 1,
 	SHF_SC_L_SHF = 2,
 	SHF_SC_R_DSHF = 3,
 };
 
-#define OP_ALU_A_SRC	0x000000003ffULL
-#define OP_ALU_B_SRC	0x000000ffc00ULL
-#define OP_ALU_DST	0x0003ff00000ULL
-#define OP_ALU_SW	0x00040000000ULL
-#define OP_ALU_OP	0x00f80000000ULL
-#define OP_ALU_DST_AB	0x01000000000ULL
-#define OP_ALU_BASE	0x0a000000000ULL
-#define OP_ALU_WR_AB	0x20000000000ULL
+#define OP_ALU_A_SRC		0x000000003ffULL
+#define OP_ALU_B_SRC		0x000000ffc00ULL
+#define OP_ALU_DST		0x0003ff00000ULL
+#define OP_ALU_SW		0x00040000000ULL
+#define OP_ALU_OP		0x00f80000000ULL
+#define OP_ALU_DST_AB		0x01000000000ULL
+#define OP_ALU_BASE		0x0a000000000ULL
+#define OP_ALU_WR_AB		0x20000000000ULL
+#define OP_ALU_SRC_LMEXTN	0x40000000000ULL
+#define OP_ALU_DST_LMEXTN	0x80000000000ULL
 
 enum alu_op {
 	ALU_OP_NONE	= 0x00,
 	ALU_OP_ADD	= 0x01,
-	ALU_OP_NEG	= 0x04,
+	ALU_OP_NOT	= 0x04,
 	ALU_OP_AND	= 0x08,
 	ALU_OP_SUB_C	= 0x0d,
 	ALU_OP_ADD_C	= 0x11,
@@ -170,26 +188,28 @@ enum alu_dst_ab {
 	ALU_DST_B = 1,
 };
 
-#define OP_LDF_BASE	0x0c000000000ULL
-#define OP_LDF_A_SRC	0x000000000ffULL
-#define OP_LDF_SC	0x00000000300ULL
-#define OP_LDF_B_SRC	0x0000003fc00ULL
-#define OP_LDF_I8	0x00000040000ULL
-#define OP_LDF_SW	0x00000080000ULL
-#define OP_LDF_ZF	0x00000100000ULL
-#define OP_LDF_BMASK	0x0000f000000ULL
-#define OP_LDF_SHF	0x001f0000000ULL
-#define OP_LDF_WR_AB	0x20000000000ULL
-
-#define OP_CMD_A_SRC	 0x000000000ffULL
-#define OP_CMD_CTX	 0x00000000300ULL
-#define OP_CMD_B_SRC	 0x0000003fc00ULL
-#define OP_CMD_TOKEN	 0x000000c0000ULL
-#define OP_CMD_XFER	 0x00001f00000ULL
-#define OP_CMD_CNT	 0x0000e000000ULL
-#define OP_CMD_SIG	 0x000f0000000ULL
-#define OP_CMD_TGT_CMD	 0x07f00000000ULL
-#define OP_CMD_MODE	0x1c0000000000ULL
+#define OP_LDF_BASE		0x0c000000000ULL
+#define OP_LDF_A_SRC		0x000000000ffULL
+#define OP_LDF_SC		0x00000000300ULL
+#define OP_LDF_B_SRC		0x0000003fc00ULL
+#define OP_LDF_I8		0x00000040000ULL
+#define OP_LDF_SW		0x00000080000ULL
+#define OP_LDF_ZF		0x00000100000ULL
+#define OP_LDF_BMASK		0x0000f000000ULL
+#define OP_LDF_SHF		0x001f0000000ULL
+#define OP_LDF_WR_AB		0x20000000000ULL
+#define OP_LDF_SRC_LMEXTN	0x40000000000ULL
+#define OP_LDF_DST_LMEXTN	0x80000000000ULL
+
+#define OP_CMD_A_SRC		0x000000000ffULL
+#define OP_CMD_CTX		0x00000000300ULL
+#define OP_CMD_B_SRC		0x0000003fc00ULL
+#define OP_CMD_TOKEN		0x000000c0000ULL
+#define OP_CMD_XFER		0x00001f00000ULL
+#define OP_CMD_CNT		0x0000e000000ULL
+#define OP_CMD_SIG		0x000f0000000ULL
+#define OP_CMD_TGT_CMD		0x07f00000000ULL
+#define OP_CMD_MODE	       0x1c0000000000ULL
 
 struct cmd_tgt_act {
 	u8 token;
@@ -198,12 +218,17 @@ struct cmd_tgt_act {
 
 enum cmd_tgt_map {
 	CMD_TGT_READ8,
-	CMD_TGT_WRITE8,
+	CMD_TGT_WRITE8_SWAP,
+	CMD_TGT_READ32,
+	CMD_TGT_READ32_LE,
+	CMD_TGT_READ32_SWAP,
 	CMD_TGT_READ_LE,
 	CMD_TGT_READ_SWAP_LE,
 	__CMD_TGT_MAP_SIZE,
 };
 
+extern const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE];
+
 enum cmd_mode {
 	CMD_MODE_40b_AB	= 0,
 	CMD_MODE_40b_BA	= 1,
@@ -215,11 +240,13 @@ enum cmd_ctx_swap {
 	CMD_CTX_NO_SWAP = 3,
 };
 
-#define OP_LCSR_BASE	0x0fc00000000ULL
-#define OP_LCSR_A_SRC	0x000000003ffULL
-#define OP_LCSR_B_SRC	0x000000ffc00ULL
-#define OP_LCSR_WRITE	0x00000200000ULL
-#define OP_LCSR_ADDR	0x001ffc00000ULL
+#define OP_LCSR_BASE		0x0fc00000000ULL
+#define OP_LCSR_A_SRC		0x000000003ffULL
+#define OP_LCSR_B_SRC		0x000000ffc00ULL
+#define OP_LCSR_WRITE		0x00000200000ULL
+#define OP_LCSR_ADDR		0x001ffc00000ULL
+#define OP_LCSR_SRC_LMEXTN	0x40000000000ULL
+#define OP_LCSR_DST_LMEXTN	0x80000000000ULL
 
 enum lcsr_wr_src {
 	LCSR_WR_AREG,
@@ -227,7 +254,127 @@ enum lcsr_wr_src {
 	LCSR_WR_IMM,
 };
 
-#define OP_CARB_BASE	0x0e000000000ULL
-#define OP_CARB_OR	0x00000010000ULL
+#define OP_CARB_BASE		0x0e000000000ULL
+#define OP_CARB_OR		0x00000010000ULL
+
+#define NFP_CSR_ACT_LM_ADDR0	0x64
+#define NFP_CSR_ACT_LM_ADDR1	0x6c
+#define NFP_CSR_ACT_LM_ADDR2	0x94
+#define NFP_CSR_ACT_LM_ADDR3	0x9c
+
+/* Software register representation, independent of operand type */
+#define NN_REG_TYPE	GENMASK(31, 24)
+#define NN_REG_LM_IDX	GENMASK(23, 22)
+#define NN_REG_LM_IDX_HI	BIT(23)
+#define NN_REG_LM_IDX_LO	BIT(22)
+#define NN_REG_LM_MOD	GENMASK(21, 20)
+#define NN_REG_VAL	GENMASK(7, 0)
+
+enum nfp_bpf_reg_type {
+	NN_REG_GPR_A =	BIT(0),
+	NN_REG_GPR_B =	BIT(1),
+	NN_REG_GPR_BOTH = NN_REG_GPR_A | NN_REG_GPR_B,
+	NN_REG_NNR =	BIT(2),
+	NN_REG_XFER =	BIT(3),
+	NN_REG_IMM =	BIT(4),
+	NN_REG_NONE =	BIT(5),
+	NN_REG_LMEM =	BIT(6),
+};
+
+enum nfp_bpf_lm_mode {
+	NN_LM_MOD_NONE = 0,
+	NN_LM_MOD_INC,
+	NN_LM_MOD_DEC,
+};
+
+#define reg_both(x)	__enc_swreg((x), NN_REG_GPR_BOTH)
+#define reg_a(x)	__enc_swreg((x), NN_REG_GPR_A)
+#define reg_b(x)	__enc_swreg((x), NN_REG_GPR_B)
+#define reg_nnr(x)	__enc_swreg((x), NN_REG_NNR)
+#define reg_xfer(x)	__enc_swreg((x), NN_REG_XFER)
+#define reg_imm(x)	__enc_swreg((x), NN_REG_IMM)
+#define reg_none()	__enc_swreg(0, NN_REG_NONE)
+#define reg_lm(x, off)	__enc_swreg_lm((x), NN_LM_MOD_NONE, (off))
+#define reg_lm_inc(x)	__enc_swreg_lm((x), NN_LM_MOD_INC, 0)
+#define reg_lm_dec(x)	__enc_swreg_lm((x), NN_LM_MOD_DEC, 0)
+#define __reg_lm(x, mod, off)	__enc_swreg_lm((x), (mod), (off))
+
+typedef __u32 __bitwise swreg;
+
+static inline swreg __enc_swreg(u16 id, u8 type)
+{
+	return (__force swreg)(id | FIELD_PREP(NN_REG_TYPE, type));
+}
+
+static inline swreg __enc_swreg_lm(u8 id, enum nfp_bpf_lm_mode mode, u8 off)
+{
+	WARN_ON(id > 3 || (off && mode != NN_LM_MOD_NONE));
+
+	return (__force swreg)(FIELD_PREP(NN_REG_TYPE, NN_REG_LMEM) |
+			       FIELD_PREP(NN_REG_LM_IDX, id) |
+			       FIELD_PREP(NN_REG_LM_MOD, mode) |
+			       off);
+}
+
+static inline u32 swreg_raw(swreg reg)
+{
+	return (__force u32)reg;
+}
+
+static inline enum nfp_bpf_reg_type swreg_type(swreg reg)
+{
+	return FIELD_GET(NN_REG_TYPE, swreg_raw(reg));
+}
+
+static inline u16 swreg_value(swreg reg)
+{
+	return FIELD_GET(NN_REG_VAL, swreg_raw(reg));
+}
+
+static inline bool swreg_lm_idx(swreg reg)
+{
+	return FIELD_GET(NN_REG_LM_IDX_LO, swreg_raw(reg));
+}
+
+static inline bool swreg_lmextn(swreg reg)
+{
+	return FIELD_GET(NN_REG_LM_IDX_HI, swreg_raw(reg));
+}
+
+static inline enum nfp_bpf_lm_mode swreg_lm_mode(swreg reg)
+{
+	return FIELD_GET(NN_REG_LM_MOD, swreg_raw(reg));
+}
+
+struct nfp_insn_ur_regs {
+	enum alu_dst_ab dst_ab;
+	u16 dst;
+	u16 areg, breg;
+	bool swap;
+	bool wr_both;
+	bool dst_lmextn;
+	bool src_lmextn;
+};
+
+struct nfp_insn_re_regs {
+	enum alu_dst_ab dst_ab;
+	u8 dst;
+	u8 areg, breg;
+	bool swap;
+	bool wr_both;
+	bool i8;
+	bool dst_lmextn;
+	bool src_lmextn;
+};
+
+int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg,
+			  struct nfp_insn_ur_regs *reg);
+int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
+			struct nfp_insn_re_regs *reg, bool has_imm8);
+
+#define NFP_USTORE_PREFETCH_WINDOW	8
+
+int nfp_ustore_check_valid_no_ecc(u64 insn);
+u64 nfp_ustore_calc_ecc_insn(u64 insn);
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index f8fa63b66739..35eaccbece36 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -346,6 +346,32 @@ exit_release_fw:
 	return err < 0 ? err : 1;
 }
 
+static void
+nfp_nsp_init_ports(struct pci_dev *pdev, struct nfp_pf *pf,
+		   struct nfp_nsp *nsp)
+{
+	bool needs_reinit = false;
+	int i;
+
+	pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp);
+	if (!pf->eth_tbl)
+		return;
+
+	if (!nfp_nsp_has_mac_reinit(nsp))
+		return;
+
+	for (i = 0; i < pf->eth_tbl->count; i++)
+		needs_reinit |= pf->eth_tbl->ports[i].override_changed;
+	if (!needs_reinit)
+		return;
+
+	kfree(pf->eth_tbl);
+	if (nfp_nsp_mac_reinit(nsp))
+		dev_warn(&pdev->dev, "MAC reinit failed\n");
+
+	pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp);
+}
+
 static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf)
 {
 	struct nfp_nsp *nsp;
@@ -366,7 +392,7 @@ static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf)
 	if (err < 0)
 		goto exit_close_nsp;
 
-	pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp);
+	nfp_nsp_init_ports(pdev, pf, nsp);
 
 	pf->nspi = __nfp_nsp_identify(nsp);
 	if (pf->nspi)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index d51d8237b984..7f9857c276b1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -394,6 +394,7 @@ struct nfp_net_rx_ring {
  * @tx_lso:	    Counter of LSO packets sent
  * @tx_errors:	    How many TX errors were encountered
  * @tx_busy:        How often was TX busy (no space)?
+ * @rx_replace_buf_alloc_fail:	Counter of RX buffer allocation failures
  * @irq_vector:     Interrupt vector number (use for talking to the OS)
  * @handler:        Interrupt handler for this ring vector
  * @name:           Name of the interrupt vector
@@ -437,6 +438,8 @@ struct nfp_net_r_vector {
 	u64 hw_csum_tx_inner;
 	u64 tx_gather;
 	u64 tx_lso;
+
+	u64 rx_replace_buf_alloc_fail;
 	u64 tx_errors;
 	u64 tx_busy;
 
@@ -473,7 +476,6 @@ struct nfp_stat_pair {
  * @dev:		Backpointer to struct device
  * @netdev:		Backpointer to net_device structure
  * @is_vf:		Is the driver attached to a VF?
- * @bpf_offload_skip_sw:  Offloaded BPF program will not be rerun by cls_bpf
  * @bpf_offload_xdp:	Offloaded BPF program is XDP
  * @chained_metadata_format:  Firemware will use new metadata format
  * @rx_dma_dir:		Mapping direction for RX buffers
@@ -499,7 +501,6 @@ struct nfp_net_dp {
 	struct net_device *netdev;
 
 	u8 is_vf:1;
-	u8 bpf_offload_skip_sw:1;
 	u8 bpf_offload_xdp:1;
 	u8 chained_metadata_format:1;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index e118b5f23996..1a603fdd9e80 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -177,9 +177,9 @@ static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 	return timed_out ? -EIO : 0;
 }
 
-static void nfp_net_reconfig_timer(unsigned long data)
+static void nfp_net_reconfig_timer(struct timer_list *t)
 {
-	struct nfp_net *nn = (void *)data;
+	struct nfp_net *nn = from_timer(nn, t, reconfig_timer);
 
 	spin_lock_bh(&nn->reconfig_lock);
 
@@ -1185,7 +1185,7 @@ static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
 	} else {
 		struct page *page;
 
-		page = alloc_page(GFP_KERNEL | __GFP_COLD);
+		page = alloc_page(GFP_KERNEL);
 		frag = page ? page_address(page) : NULL;
 	}
 	if (!frag) {
@@ -1209,15 +1209,15 @@ static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
 
 	if (!dp->xdp_prog) {
 		frag = napi_alloc_frag(dp->fl_bufsz);
+		if (unlikely(!frag))
+			return NULL;
 	} else {
 		struct page *page;
 
-		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
-		frag = page ? page_address(page) : NULL;
-	}
-	if (!frag) {
-		nn_dp_warn(dp, "Failed to alloc receive page frag\n");
-		return NULL;
+		page = dev_alloc_page();
+		if (unlikely(!page))
+			return NULL;
+		frag = page_address(page);
 	}
 
 	*dma_addr = nfp_net_dma_map_rx(dp, frag);
@@ -1514,6 +1514,11 @@ nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
 {
 	u64_stats_update_begin(&r_vec->rx_sync);
 	r_vec->rx_drops++;
+	/* If we have both skb and rxbuf the replacement buffer allocation
+	 * must have failed, count this as an alloc failure.
+	 */
+	if (skb && rxbuf)
+		r_vec->rx_replace_buf_alloc_fail++;
 	u64_stats_update_end(&r_vec->rx_sync);
 
 	/* skb is build based on the frag, free_skb() would free the frag
@@ -1582,26 +1587,6 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
 	return true;
 }
 
-static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start,
-			   unsigned int *off, unsigned int *len)
-{
-	struct xdp_buff xdp;
-	void *orig_data;
-	int ret;
-
-	xdp.data_hard_start = hard_start;
-	xdp.data = data + *off;
-	xdp.data_end = data + *off + *len;
-
-	orig_data = xdp.data;
-	ret = bpf_prog_run_xdp(prog, &xdp);
-
-	*len -= xdp.data - orig_data;
-	*off += xdp.data - orig_data;
-
-	return ret;
-}
-
 /**
  * nfp_net_rx() - receive up to @budget packets on @rx_ring
  * @rx_ring:   RX ring to receive from
@@ -1637,6 +1622,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		struct nfp_meta_parsed meta;
 		struct net_device *netdev;
 		dma_addr_t new_dma_addr;
+		u32 meta_len_xdp = 0;
 		void *new_frag;
 
 		idx = D_IDX(rx_ring, rx_ring->rd_p);
@@ -1715,16 +1701,24 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 		if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
 				  dp->bpf_offload_xdp) && !meta.portid) {
+			void *orig_data = rxbuf->frag + pkt_off;
 			unsigned int dma_off;
-			void *hard_start;
+			struct xdp_buff xdp;
 			int act;
 
-			hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+			xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+			xdp.data = orig_data;
+			xdp.data_meta = orig_data;
+			xdp.data_end = orig_data + pkt_len;
+
+			act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+			pkt_len -= xdp.data - orig_data;
+			pkt_off += xdp.data - orig_data;
 
-			act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
-					      &pkt_off, &pkt_len);
 			switch (act) {
 			case XDP_PASS:
+				meta_len_xdp = xdp.data - xdp.data_meta;
 				break;
 			case XDP_TX:
 				dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
@@ -1792,6 +1786,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       le16_to_cpu(rxd->rxd.vlan));
+		if (meta_len_xdp)
+			skb_metadata_set(skb, meta_len_xdp);
 
 		napi_gro_receive(&rx_ring->r_vec->napi, skb);
 	}
@@ -3382,7 +3378,7 @@ nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags,
 	return 0;
 }
 
-static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
@@ -3397,6 +3393,14 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
 			xdp->prog_attached = XDP_ATTACHED_HW;
 		xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
 		return 0;
+	case BPF_OFFLOAD_VERIFIER_PREP:
+		return nfp_app_bpf_verifier_prep(nn->app, nn, xdp);
+	case BPF_OFFLOAD_TRANSLATE:
+		return nfp_app_bpf_translate(nn->app, nn,
+					     xdp->offload.prog);
+	case BPF_OFFLOAD_DESTROY:
+		return nfp_app_bpf_destroy(nn->app, nn,
+					   xdp->offload.prog);
 	default:
 		return -EINVAL;
 	}
@@ -3445,7 +3449,7 @@ const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_get_phys_port_name	= nfp_port_get_phys_port_name,
 	.ndo_udp_tunnel_add	= nfp_net_add_vxlan_port,
 	.ndo_udp_tunnel_del	= nfp_net_del_vxlan_port,
-	.ndo_xdp		= nfp_net_xdp,
+	.ndo_bpf		= nfp_net_xdp,
 };
 
 /**
@@ -3546,8 +3550,7 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev,
 	spin_lock_init(&nn->reconfig_lock);
 	spin_lock_init(&nn->link_status_lock);
 
-	setup_timer(&nn->reconfig_timer,
-		    nfp_net_reconfig_timer, (unsigned long)nn);
+	timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
 
 	return nn;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index b0a452ba9039..782d452e0fc2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -255,7 +255,7 @@
  * @NFP_NET_CFG_BPF_ADDR:	DMA address of the buffer with JITed BPF code
  */
 #define NFP_NET_CFG_BPF_ABI		0x0080
-#define   NFP_NET_BPF_ABI		1
+#define   NFP_NET_BPF_ABI		2
 #define NFP_NET_CFG_BPF_CAP		0x0081
 #define   NFP_NET_BPF_CAP_RELO		(1 << 0) /* seamless reload */
 #define NFP_NET_CFG_BPF_MAX_LEN		0x0082
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index dc016dfec64d..60c8d733a37d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -181,7 +181,8 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = {
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
 #define NN_ET_SWITCH_STATS_LEN 9
-#define NN_ET_RVEC_GATHER_STATS 7
+#define NN_RVEC_GATHER_STATS	8
+#define NN_RVEC_PER_Q_STATS	3
 
 static void nfp_net_get_nspinfo(struct nfp_app *app, char *version)
 {
@@ -243,6 +244,30 @@ nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 	nfp_get_drvinfo(app, app->pdev, "*", drvinfo);
 }
 
+static void
+nfp_net_set_fec_link_mode(struct nfp_eth_table_port *eth_port,
+			  struct ethtool_link_ksettings *c)
+{
+	unsigned int modes;
+
+	ethtool_link_ksettings_add_link_mode(c, supported, FEC_NONE);
+	if (!nfp_eth_can_support_fec(eth_port)) {
+		ethtool_link_ksettings_add_link_mode(c, advertising, FEC_NONE);
+		return;
+	}
+
+	modes = nfp_eth_supported_fec_modes(eth_port);
+	if (modes & NFP_FEC_BASER) {
+		ethtool_link_ksettings_add_link_mode(c, supported, FEC_BASER);
+		ethtool_link_ksettings_add_link_mode(c, advertising, FEC_BASER);
+	}
+
+	if (modes & NFP_FEC_REED_SOLOMON) {
+		ethtool_link_ksettings_add_link_mode(c, supported, FEC_RS);
+		ethtool_link_ksettings_add_link_mode(c, advertising, FEC_RS);
+	}
+}
+
 /**
  * nfp_net_get_link_ksettings - Get Link Speed settings
  * @netdev:	network interface device structure
@@ -277,9 +302,11 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
 
 	port = nfp_port_from_netdev(netdev);
 	eth_port = nfp_port_get_eth_port(port);
-	if (eth_port)
+	if (eth_port) {
 		cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ?
 			AUTONEG_ENABLE : AUTONEG_DISABLE;
+		nfp_net_set_fec_link_mode(eth_port, cmd);
+	}
 
 	if (!netif_carrier_ok(netdev))
 		return 0;
@@ -327,7 +354,7 @@ nfp_net_set_link_ksettings(struct net_device *netdev,
 		return -EOPNOTSUPP;
 
 	if (netif_running(netdev)) {
-		netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until reboot.\n");
+		netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until driver reload.\n");
 		return -EBUSY;
 	}
 
@@ -427,7 +454,7 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
-	return NN_ET_RVEC_GATHER_STATS + nn->dp.num_r_vecs * 3;
+	return NN_RVEC_GATHER_STATS + nn->dp.num_r_vecs * NN_RVEC_PER_Q_STATS;
 }
 
 static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
@@ -444,6 +471,7 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 	data = nfp_pr_et(data, "hw_rx_csum_ok");
 	data = nfp_pr_et(data, "hw_rx_csum_inner_ok");
 	data = nfp_pr_et(data, "hw_rx_csum_err");
+	data = nfp_pr_et(data, "rx_replace_buf_alloc_fail");
 	data = nfp_pr_et(data, "hw_tx_csum");
 	data = nfp_pr_et(data, "hw_tx_inner_csum");
 	data = nfp_pr_et(data, "tx_gather");
@@ -454,9 +482,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 
 static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 {
-	u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {};
+	u64 gathered_stats[NN_RVEC_GATHER_STATS] = {};
 	struct nfp_net *nn = netdev_priv(netdev);
-	u64 tmp[NN_ET_RVEC_GATHER_STATS];
+	u64 tmp[NN_RVEC_GATHER_STATS];
 	unsigned int i, j;
 
 	for (i = 0; i < nn->dp.num_r_vecs; i++) {
@@ -468,25 +496,26 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 			tmp[0] = nn->r_vecs[i].hw_csum_rx_ok;
 			tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok;
 			tmp[2] = nn->r_vecs[i].hw_csum_rx_error;
+			tmp[3] = nn->r_vecs[i].rx_replace_buf_alloc_fail;
 		} while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start));
 
 		do {
 			start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync);
 			data[1] = nn->r_vecs[i].tx_pkts;
 			data[2] = nn->r_vecs[i].tx_busy;
-			tmp[3] = nn->r_vecs[i].hw_csum_tx;
-			tmp[4] = nn->r_vecs[i].hw_csum_tx_inner;
-			tmp[5] = nn->r_vecs[i].tx_gather;
-			tmp[6] = nn->r_vecs[i].tx_lso;
+			tmp[4] = nn->r_vecs[i].hw_csum_tx;
+			tmp[5] = nn->r_vecs[i].hw_csum_tx_inner;
+			tmp[6] = nn->r_vecs[i].tx_gather;
+			tmp[7] = nn->r_vecs[i].tx_lso;
 		} while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start));
 
-		data += 3;
+		data += NN_RVEC_PER_Q_STATS;
 
-		for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++)
+		for (j = 0; j < NN_RVEC_GATHER_STATS; j++)
 			gathered_stats[j] += tmp[j];
 	}
 
-	for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++)
+	for (j = 0; j < NN_RVEC_GATHER_STATS; j++)
 		*data++ = gathered_stats[j];
 
 	return data;
@@ -683,6 +712,91 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset)
 	}
 }
 
+static int nfp_port_fec_ethtool_to_nsp(u32 fec)
+{
+	switch (fec) {
+	case ETHTOOL_FEC_AUTO:
+		return NFP_FEC_AUTO_BIT;
+	case ETHTOOL_FEC_OFF:
+		return NFP_FEC_DISABLED_BIT;
+	case ETHTOOL_FEC_RS:
+		return NFP_FEC_REED_SOLOMON_BIT;
+	case ETHTOOL_FEC_BASER:
+		return NFP_FEC_BASER_BIT;
+	default:
+		/* NSP only supports a single mode at a time */
+		return -EOPNOTSUPP;
+	}
+}
+
+static u32 nfp_port_fec_nsp_to_ethtool(u32 fec)
+{
+	u32 result = 0;
+
+	if (fec & NFP_FEC_AUTO)
+		result |= ETHTOOL_FEC_AUTO;
+	if (fec & NFP_FEC_BASER)
+		result |= ETHTOOL_FEC_BASER;
+	if (fec & NFP_FEC_REED_SOLOMON)
+		result |= ETHTOOL_FEC_RS;
+	if (fec & NFP_FEC_DISABLED)
+		result |= ETHTOOL_FEC_OFF;
+
+	return result ?: ETHTOOL_FEC_NONE;
+}
+
+static int
+nfp_port_get_fecparam(struct net_device *netdev,
+		      struct ethtool_fecparam *param)
+{
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_port *port;
+
+	param->active_fec = ETHTOOL_FEC_NONE_BIT;
+	param->fec = ETHTOOL_FEC_NONE_BIT;
+
+	port = nfp_port_from_netdev(netdev);
+	eth_port = nfp_port_get_eth_port(port);
+	if (!eth_port)
+		return -EOPNOTSUPP;
+
+	if (!nfp_eth_can_support_fec(eth_port))
+		return 0;
+
+	param->fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec_modes_supported);
+	param->active_fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec);
+
+	return 0;
+}
+
+static int
+nfp_port_set_fecparam(struct net_device *netdev,
+		      struct ethtool_fecparam *param)
+{
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_port *port;
+	int err, fec;
+
+	port = nfp_port_from_netdev(netdev);
+	eth_port = nfp_port_get_eth_port(port);
+	if (!eth_port)
+		return -EOPNOTSUPP;
+
+	if (!nfp_eth_can_support_fec(eth_port))
+		return -EOPNOTSUPP;
+
+	fec = nfp_port_fec_ethtool_to_nsp(param->fec);
+	if (fec < 0)
+		return fec;
+
+	err = nfp_eth_set_fec(port->app->cpp, eth_port->index, fec);
+	if (!err)
+		/* Only refresh if we did something */
+		nfp_net_refresh_port_table(port);
+
+	return err < 0 ? err : 0;
+}
+
 /* RX network flow classification (RSS, filters, etc)
  */
 static u32 ethtool_flow_to_nfp_flag(u32 flow_type)
@@ -1141,6 +1255,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.set_channels		= nfp_net_set_channels,
 	.get_link_ksettings	= nfp_net_get_link_ksettings,
 	.set_link_ksettings	= nfp_net_set_link_ksettings,
+	.get_fecparam		= nfp_port_get_fecparam,
+	.set_fecparam		= nfp_port_set_fecparam,
 };
 
 const struct ethtool_ops nfp_port_ethtool_ops = {
@@ -1152,6 +1268,10 @@ const struct ethtool_ops nfp_port_ethtool_ops = {
 	.set_dump		= nfp_app_set_dump,
 	.get_dump_flag		= nfp_app_get_dump_flag,
 	.get_dump_data		= nfp_app_get_dump_data,
+	.get_link_ksettings	= nfp_net_get_link_ksettings,
+	.set_link_ksettings	= nfp_net_set_link_ksettings,
+	.get_fecparam		= nfp_port_get_fecparam,
+	.set_fecparam		= nfp_port_set_fecparam,
 };
 
 void nfp_net_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index ff373acd28f3..c505014121c4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -597,7 +597,7 @@ nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port,
 		return -EIO;
 	}
 	if (eth_port->override_changed) {
-		nfp_warn(cpp, "Port #%d config changed, unregistering. Reboot required before port will be operational again.\n", port->eth_id);
+		nfp_warn(cpp, "Port #%d config changed, unregistering. Driver reload required before port will be operational again.\n", port->eth_id);
 		port->type = NFP_PORT_INVALID;
 	}
 
@@ -611,6 +611,7 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf)
 	struct nfp_eth_table *eth_table;
 	struct nfp_net *nn, *next;
 	struct nfp_port *port;
+	int err;
 
 	lockdep_assert_held(&pf->lock);
 
@@ -640,6 +641,11 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf)
 
 	kfree(eth_table);
 
+	/* Resync repr state. This may cause reprs to be removed. */
+	err = nfp_reprs_resync_phys_ports(pf->app);
+	if (err)
+		return err;
+
 	/* Shoot off the ports which became invalid */
 	list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) {
 		if (!nn->port || nn->port->type != NFP_PORT_INVALID)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index d540a9dc77b3..1bce8c131bb9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -390,3 +390,50 @@ struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs)
 
 	return reprs;
 }
+
+int nfp_reprs_resync_phys_ports(struct nfp_app *app)
+{
+	struct nfp_reprs *reprs, *old_reprs;
+	struct nfp_repr *repr;
+	int i;
+
+	old_reprs =
+		rcu_dereference_protected(app->reprs[NFP_REPR_TYPE_PHYS_PORT],
+					  lockdep_is_held(&app->pf->lock));
+	if (!old_reprs)
+		return 0;
+
+	reprs = nfp_reprs_alloc(old_reprs->num_reprs);
+	if (!reprs)
+		return -ENOMEM;
+
+	for (i = 0; i < old_reprs->num_reprs; i++) {
+		if (!old_reprs->reprs[i])
+			continue;
+
+		repr = netdev_priv(old_reprs->reprs[i]);
+		if (repr->port->type == NFP_PORT_INVALID)
+			continue;
+
+		reprs->reprs[i] = old_reprs->reprs[i];
+	}
+
+	old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs);
+	synchronize_rcu();
+
+	/* Now we free up removed representors */
+	for (i = 0; i < old_reprs->num_reprs; i++) {
+		if (!old_reprs->reprs[i])
+			continue;
+
+		repr = netdev_priv(old_reprs->reprs[i]);
+		if (repr->port->type != NFP_PORT_INVALID)
+			continue;
+
+		nfp_app_repr_stop(app, repr);
+		nfp_repr_clean(repr);
+	}
+
+	kfree(old_reprs);
+	return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
index 32179cad062a..5d4d897bc9c6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
@@ -124,5 +124,6 @@ void
 nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
 				 enum nfp_repr_type type);
 struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs);
+int nfp_reprs_resync_phys_ports(struct nfp_app *app);
 
 #endif /* NFP_NET_REPR_H */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
index e6d2e06b050c..8b1b962cf1d1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
@@ -112,7 +112,13 @@ int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	writew(get_unaligned_be16(mac + 4),
 	       app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO);
 
-	return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC");
+	err = nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC");
+	if (!err)
+		nfp_info(app->pf->cpp,
+			 "MAC %pM set on VF %d, reload the VF driver to make this change effective.\n",
+			 mac, vf);
+
+	return err;
 }
 
 int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
index 37364555c42b..14a6d1ba51a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c
@@ -477,6 +477,11 @@ int nfp_nsp_device_soft_reset(struct nfp_nsp *state)
 	return nfp_nsp_command(state, SPCODE_SOFT_RESET, 0, 0, 0);
 }
 
+int nfp_nsp_mac_reinit(struct nfp_nsp *state)
+{
+	return nfp_nsp_command(state, SPCODE_MAC_INIT, 0, 0, 0);
+}
+
 int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw)
 {
 	return nfp_nsp_command_buf(state, SPCODE_FW_LOAD, fw->size, fw->data,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
index e2f028027c6f..650ca1a5bd21 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h
@@ -48,6 +48,12 @@ u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state);
 int nfp_nsp_wait(struct nfp_nsp *state);
 int nfp_nsp_device_soft_reset(struct nfp_nsp *state);
 int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw);
+int nfp_nsp_mac_reinit(struct nfp_nsp *state);
+
+static inline bool nfp_nsp_has_mac_reinit(struct nfp_nsp *state)
+{
+	return nfp_nsp_get_abi_ver_minor(state) > 20;
+}
 
 enum nfp_eth_interface {
 	NFP_INTERFACE_NONE	= 0,
@@ -73,6 +79,18 @@ enum nfp_eth_aneg {
 	NFP_ANEG_DISABLED,
 };
 
+enum nfp_eth_fec {
+	NFP_FEC_AUTO_BIT = 0,
+	NFP_FEC_BASER_BIT,
+	NFP_FEC_REED_SOLOMON_BIT,
+	NFP_FEC_DISABLED_BIT,
+};
+
+#define NFP_FEC_AUTO		BIT(NFP_FEC_AUTO_BIT)
+#define NFP_FEC_BASER		BIT(NFP_FEC_BASER_BIT)
+#define NFP_FEC_REED_SOLOMON	BIT(NFP_FEC_REED_SOLOMON_BIT)
+#define NFP_FEC_DISABLED	BIT(NFP_FEC_DISABLED_BIT)
+
 /**
  * struct nfp_eth_table - ETH table information
  * @count:	number of table entries
@@ -87,6 +105,7 @@ enum nfp_eth_aneg {
  * @speed:	interface speed (in Mbps)
  * @interface:	interface (module) plugged in
  * @media:	media type of the @interface
+ * @fec:	forward error correction mode
  * @aneg:	auto negotiation mode
  * @mac_addr:	interface MAC address
  * @label_port:	port id
@@ -99,6 +118,7 @@ enum nfp_eth_aneg {
  * @port_type:	one of %PORT_* defines for ethtool
  * @port_lanes:	total number of lanes on the port (sum of lanes of all subports)
  * @is_split:	is interface part of a split port
+ * @fec_modes_supported:	bitmap of FEC modes supported
  */
 struct nfp_eth_table {
 	unsigned int count;
@@ -114,6 +134,7 @@ struct nfp_eth_table {
 		unsigned int interface;
 		enum nfp_eth_media media;
 
+		enum nfp_eth_fec fec;
 		enum nfp_eth_aneg aneg;
 
 		u8 mac_addr[ETH_ALEN];
@@ -133,6 +154,8 @@ struct nfp_eth_table {
 		unsigned int port_lanes;
 
 		bool is_split;
+
+		unsigned int fec_modes_supported;
 	} ports[0];
 };
 
@@ -143,6 +166,19 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp);
 int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable);
 int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx,
 			   bool configed);
+int
+nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode);
+
+static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port)
+{
+	return !!eth_port->fec_modes_supported;
+}
+
+static inline unsigned int
+nfp_eth_supported_fec_modes(struct nfp_eth_table_port *eth_port)
+{
+	return eth_port->fec_modes_supported;
+}
 
 struct nfp_nsp *nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx);
 int nfp_eth_config_commit_end(struct nfp_nsp *nsp);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
index f6f7c085f8e0..7ca589660e4d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
@@ -55,6 +55,8 @@
 #define NSP_ETH_PORT_INDEX		GENMASK_ULL(15, 8)
 #define NSP_ETH_PORT_LABEL		GENMASK_ULL(53, 48)
 #define NSP_ETH_PORT_PHYLABEL		GENMASK_ULL(59, 54)
+#define NSP_ETH_PORT_FEC_SUPP_BASER	BIT_ULL(60)
+#define NSP_ETH_PORT_FEC_SUPP_RS	BIT_ULL(61)
 
 #define NSP_ETH_PORT_LANES_MASK		cpu_to_le64(NSP_ETH_PORT_LANES)
 
@@ -67,6 +69,7 @@
 #define NSP_ETH_STATE_MEDIA		GENMASK_ULL(21, 20)
 #define NSP_ETH_STATE_OVRD_CHNG		BIT_ULL(22)
 #define NSP_ETH_STATE_ANEG		GENMASK_ULL(25, 23)
+#define NSP_ETH_STATE_FEC		GENMASK_ULL(27, 26)
 
 #define NSP_ETH_CTRL_CONFIGURED		BIT_ULL(0)
 #define NSP_ETH_CTRL_ENABLED		BIT_ULL(1)
@@ -75,6 +78,7 @@
 #define NSP_ETH_CTRL_SET_RATE		BIT_ULL(4)
 #define NSP_ETH_CTRL_SET_LANES		BIT_ULL(5)
 #define NSP_ETH_CTRL_SET_ANEG		BIT_ULL(6)
+#define NSP_ETH_CTRL_SET_FEC		BIT_ULL(7)
 
 enum nfp_eth_raw {
 	NSP_ETH_RAW_PORT = 0,
@@ -152,6 +156,7 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src,
 		       unsigned int index, struct nfp_eth_table_port *dst)
 {
 	unsigned int rate;
+	unsigned int fec;
 	u64 port, state;
 
 	port = le64_to_cpu(src->port);
@@ -183,6 +188,18 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src,
 
 	dst->override_changed = FIELD_GET(NSP_ETH_STATE_OVRD_CHNG, state);
 	dst->aneg = FIELD_GET(NSP_ETH_STATE_ANEG, state);
+
+	if (nfp_nsp_get_abi_ver_minor(nsp) < 22)
+		return;
+
+	fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_BASER, port);
+	dst->fec_modes_supported |= fec << NFP_FEC_BASER_BIT;
+	fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_RS, port);
+	dst->fec_modes_supported |= fec << NFP_FEC_REED_SOLOMON_BIT;
+	if (dst->fec_modes_supported)
+		dst->fec_modes_supported |= NFP_FEC_AUTO | NFP_FEC_DISABLED;
+
+	dst->fec = 1 << FIELD_GET(NSP_ETH_STATE_FEC, state);
 }
 
 static void
@@ -469,10 +486,10 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed)
 	return nfp_eth_config_commit_end(nsp);
 }
 
-/* Force inline, FIELD_* macroes require masks to be compilation-time known */
-static __always_inline int
+static int
 nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx,
-		       const u64 mask, unsigned int val, const u64 ctrl_bit)
+		       const u64 mask, const unsigned int shift,
+		       unsigned int val, const u64 ctrl_bit)
 {
 	union eth_table_entry *entries = nfp_nsp_config_entries(nsp);
 	unsigned int idx = nfp_nsp_config_idx(nsp);
@@ -489,11 +506,11 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx,
 
 	/* Check if we are already in requested state */
 	reg = le64_to_cpu(entries[idx].raw[raw_idx]);
-	if (val == FIELD_GET(mask, reg))
+	if (val == (reg & mask) >> shift)
 		return 0;
 
 	reg &= ~mask;
-	reg |= FIELD_PREP(mask, val);
+	reg |= (val << shift) & mask;
 	entries[idx].raw[raw_idx] = cpu_to_le64(reg);
 
 	entries[idx].control |= cpu_to_le64(ctrl_bit);
@@ -503,6 +520,13 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx,
 	return 0;
 }
 
+#define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit)	\
+	({								\
+		__BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \
+		nfp_eth_set_bit_config(nsp, raw_idx, mask, __bf_shf(mask), \
+				       val, ctrl_bit);			\
+	})
+
 /**
  * __nfp_eth_set_aneg() - set PHY autonegotiation control bit
  * @nsp:	NFP NSP handle returned from nfp_eth_config_start()
@@ -515,12 +539,59 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx,
  */
 int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode)
 {
-	return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE,
+	return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE,
 				      NSP_ETH_STATE_ANEG, mode,
 				      NSP_ETH_CTRL_SET_ANEG);
 }
 
 /**
+ * __nfp_eth_set_fec() - set PHY forward error correction control bit
+ * @nsp:	NFP NSP handle returned from nfp_eth_config_start()
+ * @mode:	Desired fec mode
+ *
+ * Set the PHY module forward error correction mode.
+ * Will write to hwinfo overrides in the flash (persistent config).
+ *
+ * Return: 0 or -ERRNO.
+ */
+static int __nfp_eth_set_fec(struct nfp_nsp *nsp, enum nfp_eth_fec mode)
+{
+	return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE,
+				      NSP_ETH_STATE_FEC, mode,
+				      NSP_ETH_CTRL_SET_FEC);
+}
+
+/**
+ * nfp_eth_set_fec() - set PHY forward error correction control mode
+ * @cpp:	NFP CPP handle
+ * @idx:	NFP chip-wide port index
+ * @mode:	Desired fec mode
+ *
+ * Return:
+ * 0 - configuration successful;
+ * 1 - no changes were needed;
+ * -ERRNO - configuration failed.
+ */
+int
+nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode)
+{
+	struct nfp_nsp *nsp;
+	int err;
+
+	nsp = nfp_eth_config_start(cpp, idx);
+	if (IS_ERR(nsp))
+		return PTR_ERR(nsp);
+
+	err = __nfp_eth_set_fec(nsp, mode);
+	if (err) {
+		nfp_eth_config_cleanup_end(nsp);
+		return err;
+	}
+
+	return nfp_eth_config_commit_end(nsp);
+}
+
+/**
  * __nfp_eth_set_speed() - set interface speed/rate
  * @nsp:	NFP NSP handle returned from nfp_eth_config_start()
  * @speed:	Desired speed (per lane)
@@ -544,7 +615,7 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed)
 		return -EINVAL;
 	}
 
-	return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE,
+	return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE,
 				      NSP_ETH_STATE_RATE, rate,
 				      NSP_ETH_CTRL_SET_RATE);
 }
@@ -561,6 +632,6 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed)
  */
 int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes)
 {
-	return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES,
+	return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES,
 				      lanes, NSP_ETH_CTRL_SET_LANES);
 }
diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c
index 4a67c55aa9f1..052b3d2c07a1 100644
--- a/drivers/net/ethernet/nuvoton/w90p910_ether.c
+++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c
@@ -253,10 +253,10 @@ static void update_linkspeed(struct net_device *dev)
 	netif_carrier_on(dev);
 }
 
-static void w90p910_check_link(unsigned long dev_id)
+static void w90p910_check_link(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) dev_id;
-	struct w90p910_ether *ether = netdev_priv(dev);
+	struct w90p910_ether *ether = from_timer(ether, t, check_timer);
+	struct net_device *dev = ether->mii.dev;
 
 	update_linkspeed(dev);
 	mod_timer(&ether->check_timer, jiffies + msecs_to_jiffies(1000));
@@ -957,8 +957,7 @@ static int w90p910_ether_setup(struct net_device *dev)
 	ether->mii.mdio_read = w90p910_mdio_read;
 	ether->mii.mdio_write = w90p910_mdio_write;
 
-	setup_timer(&ether->check_timer, w90p910_check_link,
-						(unsigned long)dev);
+	timer_setup(&ether->check_timer, w90p910_check_link, 0);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 994a83a1f0a5..ac8439ceea10 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1024,12 +1024,18 @@ static void free_rings(struct net_device *dev)
 
 	if (!nv_optimized(np)) {
 		if (np->rx_ring.orig)
-			pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size),
-					    np->rx_ring.orig, np->ring_addr);
+			dma_free_coherent(&np->pci_dev->dev,
+					  sizeof(struct ring_desc) *
+					  (np->rx_ring_size +
+					  np->tx_ring_size),
+					  np->rx_ring.orig, np->ring_addr);
 	} else {
 		if (np->rx_ring.ex)
-			pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size),
-					    np->rx_ring.ex, np->ring_addr);
+			dma_free_coherent(&np->pci_dev->dev,
+					  sizeof(struct ring_desc_ex) *
+					  (np->rx_ring_size +
+					  np->tx_ring_size),
+					  np->rx_ring.ex, np->ring_addr);
 	}
 	kfree(np->rx_skb);
 	kfree(np->tx_skb);
@@ -1813,12 +1819,12 @@ static int nv_alloc_rx(struct net_device *dev)
 		struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+			np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev,
 							     skb->data,
 							     skb_tailroom(skb),
-							     PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(np->pci_dev,
-						  np->put_rx_ctx->dma)) {
+							     DMA_FROM_DEVICE);
+			if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+						       np->put_rx_ctx->dma))) {
 				kfree_skb(skb);
 				goto packet_dropped;
 			}
@@ -1854,12 +1860,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 		struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+			np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev,
 							     skb->data,
 							     skb_tailroom(skb),
-							     PCI_DMA_FROMDEVICE);
-			if (pci_dma_mapping_error(np->pci_dev,
-						  np->put_rx_ctx->dma)) {
+							     DMA_FROM_DEVICE);
+			if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+						       np->put_rx_ctx->dma))) {
 				kfree_skb(skb);
 				goto packet_dropped;
 			}
@@ -1884,10 +1890,9 @@ packet_dropped:
 }
 
 /* If rx bufs are exhausted called after 50ms to attempt to refresh */
-static void nv_do_rx_refill(unsigned long data)
+static void nv_do_rx_refill(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = netdev_priv(dev);
+	struct fe_priv *np = from_timer(np, t, oom_kick);
 
 	/* Just reschedule NAPI rx processing */
 	napi_schedule(&np->napi);
@@ -1977,9 +1982,9 @@ static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb)
 {
 	if (tx_skb->dma) {
 		if (tx_skb->dma_single)
-			pci_unmap_single(np->pci_dev, tx_skb->dma,
+			dma_unmap_single(&np->pci_dev->dev, tx_skb->dma,
 					 tx_skb->dma_len,
-					 PCI_DMA_TODEVICE);
+					 DMA_TO_DEVICE);
 		else
 			pci_unmap_page(np->pci_dev, tx_skb->dma,
 				       tx_skb->dma_len,
@@ -2047,10 +2052,10 @@ static void nv_drain_rx(struct net_device *dev)
 		}
 		wmb();
 		if (np->rx_skb[i].skb) {
-			pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
+			dma_unmap_single(&np->pci_dev->dev, np->rx_skb[i].dma,
 					 (skb_end_pointer(np->rx_skb[i].skb) -
-					  np->rx_skb[i].skb->data),
-					 PCI_DMA_FROMDEVICE);
+					 np->rx_skb[i].skb->data),
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb(np->rx_skb[i].skb);
 			np->rx_skb[i].skb = NULL;
 		}
@@ -2221,13 +2226,12 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* setup the header buffer */
 	do {
-		prev_tx = put_tx;
-		prev_tx_ctx = np->put_tx_ctx;
 		bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
-		np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
-						PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(np->pci_dev,
-					  np->put_tx_ctx->dma)) {
+		np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev,
+						     skb->data + offset, bcnt,
+						     DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+					       np->put_tx_ctx->dma))) {
 			/* on DMA mapping error - drop the packet */
 			dev_kfree_skb_any(skb);
 			u64_stats_update_begin(&np->swstats_tx_syncp);
@@ -2256,8 +2260,6 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		offset = 0;
 
 		do {
-			prev_tx = put_tx;
-			prev_tx_ctx = np->put_tx_ctx;
 			if (!start_tx_ctx)
 				start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx;
 
@@ -2267,7 +2269,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 							frag, offset,
 							bcnt,
 							DMA_TO_DEVICE);
-			if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) {
+			if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+						       np->put_tx_ctx->dma))) {
 
 				/* Unwind the mapped fragments */
 				do {
@@ -2297,6 +2300,16 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		} while (frag_size);
 	}
 
+	if (unlikely(put_tx == np->first_tx.orig))
+		prev_tx = np->last_tx.orig;
+	else
+		prev_tx = put_tx - 1;
+
+	if (unlikely(np->put_tx_ctx == np->first_tx_ctx))
+		prev_tx_ctx = np->last_tx_ctx;
+	else
+		prev_tx_ctx = np->put_tx_ctx - 1;
+
 	/* set last fragment flag  */
 	prev_tx->flaglen |= cpu_to_le32(tx_flags_extra);
 
@@ -2370,13 +2383,12 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 
 	/* setup the header buffer */
 	do {
-		prev_tx = put_tx;
-		prev_tx_ctx = np->put_tx_ctx;
 		bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
-		np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
-						PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(np->pci_dev,
-					  np->put_tx_ctx->dma)) {
+		np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev,
+						     skb->data + offset, bcnt,
+						     DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+					       np->put_tx_ctx->dma))) {
 			/* on DMA mapping error - drop the packet */
 			dev_kfree_skb_any(skb);
 			u64_stats_update_begin(&np->swstats_tx_syncp);
@@ -2406,8 +2418,6 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 		offset = 0;
 
 		do {
-			prev_tx = put_tx;
-			prev_tx_ctx = np->put_tx_ctx;
 			bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : frag_size;
 			if (!start_tx_ctx)
 				start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx;
@@ -2417,7 +2427,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 							bcnt,
 							DMA_TO_DEVICE);
 
-			if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) {
+			if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+						       np->put_tx_ctx->dma))) {
 
 				/* Unwind the mapped fragments */
 				do {
@@ -2447,6 +2458,16 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 		} while (frag_size);
 	}
 
+	if (unlikely(put_tx == np->first_tx.ex))
+		prev_tx = np->last_tx.ex;
+	else
+		prev_tx = put_tx - 1;
+
+	if (unlikely(np->put_tx_ctx == np->first_tx_ctx))
+		prev_tx_ctx = np->last_tx_ctx;
+	else
+		prev_tx_ctx = np->put_tx_ctx - 1;
+
 	/* set last fragment flag  */
 	prev_tx->flaglen |= cpu_to_le32(NV_TX2_LASTPACKET);
 
@@ -2810,9 +2831,9 @@ static int nv_rx_process(struct net_device *dev, int limit)
 		 * TODO: check if a prefetch of the first cacheline improves
 		 * the performance.
 		 */
-		pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
-				np->get_rx_ctx->dma_len,
-				PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma,
+				 np->get_rx_ctx->dma_len,
+				 DMA_FROM_DEVICE);
 		skb = np->get_rx_ctx->skb;
 		np->get_rx_ctx->skb = NULL;
 
@@ -2916,9 +2937,9 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
 		 * TODO: check if a prefetch of the first cacheline improves
 		 * the performance.
 		 */
-		pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
-				np->get_rx_ctx->dma_len,
-				PCI_DMA_FROMDEVICE);
+		dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma,
+				 np->get_rx_ctx->dma_len,
+				 DMA_FROM_DEVICE);
 		skb = np->get_rx_ctx->skb;
 		np->get_rx_ctx->skb = NULL;
 
@@ -4061,10 +4082,10 @@ static void nv_free_irq(struct net_device *dev)
 	}
 }
 
-static void nv_do_nic_poll(unsigned long data)
+static void nv_do_nic_poll(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = netdev_priv(dev);
+	struct fe_priv *np = from_timer(np, t, nic_poll);
+	struct net_device *dev = np->dev;
 	u8 __iomem *base = get_hwbase(dev);
 	u32 mask = 0;
 	unsigned long flags;
@@ -4172,16 +4193,18 @@ static void nv_do_nic_poll(unsigned long data)
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void nv_poll_controller(struct net_device *dev)
 {
-	nv_do_nic_poll((unsigned long) dev);
+	struct fe_priv *np = netdev_priv(dev);
+
+	nv_do_nic_poll(&np->nic_poll);
 }
 #endif
 
-static void nv_do_stats_poll(unsigned long data)
+static void nv_do_stats_poll(struct timer_list *t)
 	__acquires(&netdev_priv(dev)->hwstats_lock)
 	__releases(&netdev_priv(dev)->hwstats_lock)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = netdev_priv(dev);
+	struct fe_priv *np = from_timer(np, t, stats_poll);
+	struct net_device *dev = np->dev;
 
 	/* If lock is currently taken, the stats are being refreshed
 	 * and hence fresh enough */
@@ -4591,13 +4614,17 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
 
 	/* allocate new rings */
 	if (!nv_optimized(np)) {
-		rxtx_ring = pci_alloc_consistent(np->pci_dev,
-					    sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending),
-					    &ring_addr);
+		rxtx_ring = dma_alloc_coherent(&np->pci_dev->dev,
+					       sizeof(struct ring_desc) *
+					       (ring->rx_pending +
+					       ring->tx_pending),
+					       &ring_addr, GFP_ATOMIC);
 	} else {
-		rxtx_ring = pci_alloc_consistent(np->pci_dev,
-					    sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending),
-					    &ring_addr);
+		rxtx_ring = dma_alloc_coherent(&np->pci_dev->dev,
+					       sizeof(struct ring_desc_ex) *
+					       (ring->rx_pending +
+					       ring->tx_pending),
+					       &ring_addr, GFP_ATOMIC);
 	}
 	rx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->rx_pending, GFP_KERNEL);
 	tx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->tx_pending, GFP_KERNEL);
@@ -4605,12 +4632,18 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
 		/* fall back to old rings */
 		if (!nv_optimized(np)) {
 			if (rxtx_ring)
-				pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending),
-						    rxtx_ring, ring_addr);
+				dma_free_coherent(&np->pci_dev->dev,
+						  sizeof(struct ring_desc) *
+						  (ring->rx_pending +
+						  ring->tx_pending),
+						  rxtx_ring, ring_addr);
 		} else {
 			if (rxtx_ring)
-				pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending),
-						    rxtx_ring, ring_addr);
+				dma_free_coherent(&np->pci_dev->dev,
+						  sizeof(struct ring_desc_ex) *
+						  (ring->rx_pending +
+						  ring->tx_pending),
+						  rxtx_ring, ring_addr);
 		}
 
 		kfree(rx_skbuff);
@@ -5070,11 +5103,11 @@ static int nv_loopback_test(struct net_device *dev)
 		ret = 0;
 		goto out;
 	}
-	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
+	test_dma_addr = dma_map_single(&np->pci_dev->dev, tx_skb->data,
 				       skb_tailroom(tx_skb),
-				       PCI_DMA_FROMDEVICE);
-	if (pci_dma_mapping_error(np->pci_dev,
-				  test_dma_addr)) {
+				       DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+				       test_dma_addr))) {
 		dev_kfree_skb_any(tx_skb);
 		goto out;
 	}
@@ -5129,9 +5162,9 @@ static int nv_loopback_test(struct net_device *dev)
 		}
 	}
 
-	pci_unmap_single(np->pci_dev, test_dma_addr,
-		       (skb_end_pointer(tx_skb) - tx_skb->data),
-		       PCI_DMA_TODEVICE);
+	dma_unmap_single(&np->pci_dev->dev, test_dma_addr,
+			 (skb_end_pointer(tx_skb) - tx_skb->data),
+			 DMA_TO_DEVICE);
 	dev_kfree_skb_any(tx_skb);
  out:
 	/* stop engines */
@@ -5627,10 +5660,9 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 	u64_stats_init(&np->swstats_rx_syncp);
 	u64_stats_init(&np->swstats_tx_syncp);
 
-	setup_timer(&np->oom_kick, nv_do_rx_refill, (unsigned long)dev);
-	setup_timer(&np->nic_poll, nv_do_nic_poll, (unsigned long)dev);
-	setup_deferrable_timer(&np->stats_poll, nv_do_stats_poll,
-			       (unsigned long)dev);
+	timer_setup(&np->oom_kick, nv_do_rx_refill, 0);
+	timer_setup(&np->nic_poll, nv_do_nic_poll, 0);
+	timer_setup(&np->stats_poll, nv_do_stats_poll, TIMER_DEFERRABLE);
 
 	err = pci_enable_device(pci_dev);
 	if (err)
@@ -5736,16 +5768,21 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 	np->tx_ring_size = TX_RING_DEFAULT;
 
 	if (!nv_optimized(np)) {
-		np->rx_ring.orig = pci_alloc_consistent(pci_dev,
-					sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size),
-					&np->ring_addr);
+		np->rx_ring.orig = dma_alloc_coherent(&pci_dev->dev,
+						      sizeof(struct ring_desc) *
+						      (np->rx_ring_size +
+						      np->tx_ring_size),
+						      &np->ring_addr,
+						      GFP_ATOMIC);
 		if (!np->rx_ring.orig)
 			goto out_unmap;
 		np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size];
 	} else {
-		np->rx_ring.ex = pci_alloc_consistent(pci_dev,
-					sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size),
-					&np->ring_addr);
+		np->rx_ring.ex = dma_alloc_coherent(&pci_dev->dev,
+						    sizeof(struct ring_desc_ex) *
+						    (np->rx_ring_size +
+						    np->tx_ring_size),
+						    &np->ring_addr, GFP_ATOMIC);
 		if (!np->rx_ring.ex)
 			goto out_unmap;
 		np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index 8d710a3b4db0..697e29dd4bd3 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
@@ -613,7 +613,6 @@ struct pch_gbe_privdata {
  * @rx_ring:		Pointer of Rx descriptor ring structure
  * @rx_buffer_len:	Receive buffer length
  * @tx_queue_len:	Transmit queue length
- * @have_msi:		PCI MSI mode flag
  * @pch_gbe_privdata:	PCI Device ID driver_data
  */
 
@@ -623,6 +622,7 @@ struct pch_gbe_adapter {
 	atomic_t irq_sem;
 	struct net_device *netdev;
 	struct pci_dev *pdev;
+	int irq;
 	struct net_device *polling_netdev;
 	struct napi_struct napi;
 	struct pch_gbe_hw hw;
@@ -637,7 +637,6 @@ struct pch_gbe_adapter {
 	struct pch_gbe_rx_ring *rx_ring;
 	unsigned long rx_buffer_len;
 	unsigned long tx_queue_len;
-	bool have_msi;
 	bool rx_stop_flag;
 	int hwts_tx_en;
 	int hwts_rx_en;
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 5ae9681a2da7..457ee80307ea 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -781,11 +781,8 @@ static void pch_gbe_free_irq(struct pch_gbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 
-	free_irq(adapter->pdev->irq, netdev);
-	if (adapter->have_msi) {
-		pci_disable_msi(adapter->pdev);
-		netdev_dbg(netdev, "call pci_disable_msi\n");
-	}
+	free_irq(adapter->irq, netdev);
+	pci_free_irq_vectors(adapter->pdev);
 }
 
 /**
@@ -799,7 +796,7 @@ static void pch_gbe_irq_disable(struct pch_gbe_adapter *adapter)
 	atomic_inc(&adapter->irq_sem);
 	iowrite32(0, &hw->reg->INT_EN);
 	ioread32(&hw->reg->INT_ST);
-	synchronize_irq(adapter->pdev->irq);
+	synchronize_irq(adapter->irq);
 
 	netdev_dbg(adapter->netdev, "INT_EN reg : 0x%08x\n",
 		   ioread32(&hw->reg->INT_EN));
@@ -1903,30 +1900,23 @@ static int pch_gbe_request_irq(struct pch_gbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	int err;
-	int flags;
 
-	flags = IRQF_SHARED;
-	adapter->have_msi = false;
-	err = pci_enable_msi(adapter->pdev);
-	netdev_dbg(netdev, "call pci_enable_msi\n");
-	if (err) {
-		netdev_dbg(netdev, "call pci_enable_msi - Error: %d\n", err);
-	} else {
-		flags = 0;
-		adapter->have_msi = true;
-	}
-	err = request_irq(adapter->pdev->irq, &pch_gbe_intr,
-			  flags, netdev->name, netdev);
+	err = pci_alloc_irq_vectors(adapter->pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (err < 0)
+		return err;
+
+	adapter->irq = pci_irq_vector(adapter->pdev, 0);
+
+	err = request_irq(adapter->irq, &pch_gbe_intr, IRQF_SHARED,
+			  netdev->name, netdev);
 	if (err)
 		netdev_err(netdev, "Unable to allocate interrupt Error: %d\n",
 			   err);
-	netdev_dbg(netdev,
-		   "adapter->have_msi : %d  flags : 0x%04x  return : 0x%04x\n",
-		   adapter->have_msi, flags, err);
+	netdev_dbg(netdev, "have_msi : %d  return : 0x%04x\n",
+		   pci_dev_msi_enabled(adapter->pdev), err);
 	return err;
 }
 
-
 /**
  * pch_gbe_up - Up GbE network device
  * @adapter:  Board private structure
@@ -2399,9 +2389,9 @@ static void pch_gbe_netpoll(struct net_device *netdev)
 {
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
 
-	disable_irq(adapter->pdev->irq);
-	pch_gbe_intr(adapter->pdev->irq, netdev);
-	enable_irq(adapter->pdev->irq);
+	disable_irq(adapter->irq);
+	pch_gbe_intr(adapter->irq, netdev);
+	enable_irq(adapter->irq);
 }
 #endif
 
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index 482b85e4d665..c9529c29a0a7 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -413,13 +413,13 @@ that case.
 
 /* The rest of these values should never change. */
 
-static void hamachi_timer(unsigned long data);
+static void hamachi_timer(struct timer_list *t);
 
 enum capability_flags {CanHaveMII=1, };
 static const struct chip_info {
 	u16	vendor_id, device_id, device_id_mask, pad;
 	const char *name;
-	void (*media_timer)(unsigned long data);
+	void (*media_timer)(struct timer_list *t);
 	int flags;
 } chip_tbl[] = {
 	{0x1318, 0x0911, 0xffff, 0, "Hamachi GNIC-II", hamachi_timer, 0},
@@ -547,7 +547,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
 static int hamachi_open(struct net_device *dev);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-static void hamachi_timer(unsigned long data);
+static void hamachi_timer(struct timer_list *t);
 static void hamachi_tx_timeout(struct net_device *dev);
 static void hamachi_init_ring(struct net_device *dev);
 static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb,
@@ -979,10 +979,8 @@ static int hamachi_open(struct net_device *dev)
 			   dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus));
 	}
 	/* Set the timer to check for link beat. */
-	init_timer(&hmp->timer);
+	timer_setup(&hmp->timer, hamachi_timer, 0);
 	hmp->timer.expires = RUN_AT((24*HZ)/10);			/* 2.4 sec. */
-	hmp->timer.data = (unsigned long)dev;
-	hmp->timer.function = hamachi_timer;				/* timer handler */
 	add_timer(&hmp->timer);
 
 	return 0;
@@ -1019,10 +1017,10 @@ static inline int hamachi_tx(struct net_device *dev)
 	return 0;
 }
 
-static void hamachi_timer(unsigned long data)
+static void hamachi_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct hamachi_private *hmp = netdev_priv(dev);
+	struct hamachi_private *hmp = from_timer(hmp, t, timer);
+	struct net_device *dev = hmp->mii_if.dev;
 	void __iomem *ioaddr = hmp->base;
 	int next_tick = 10*HZ;
 
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index fa7770da6ef8..54224d1822e3 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -343,7 +343,7 @@ static int mdio_read(void __iomem *ioaddr, int phy_id, int location);
 static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static int yellowfin_open(struct net_device *dev);
-static void yellowfin_timer(unsigned long data);
+static void yellowfin_timer(struct timer_list *t);
 static void yellowfin_tx_timeout(struct net_device *dev);
 static int yellowfin_init_ring(struct net_device *dev);
 static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb,
@@ -632,10 +632,8 @@ static int yellowfin_open(struct net_device *dev)
 	}
 
 	/* Set the timer to check for link beat. */
-	init_timer(&yp->timer);
+	timer_setup(&yp->timer, yellowfin_timer, 0);
 	yp->timer.expires = jiffies + 3*HZ;
-	yp->timer.data = (unsigned long)dev;
-	yp->timer.function = yellowfin_timer;				/* timer handler */
 	add_timer(&yp->timer);
 out:
 	return rc;
@@ -645,10 +643,10 @@ err_free_irq:
 	goto out;
 }
 
-static void yellowfin_timer(unsigned long data)
+static void yellowfin_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct yellowfin_private *yp = netdev_priv(dev);
+	struct yellowfin_private *yp = from_timer(yp, t, timer);
+	struct net_device *dev = pci_get_drvdata(yp->pci_dev);
 	void __iomem *ioaddr = yp->base;
 	int next_tick = 60*HZ;
 
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index c2e24afbaeb2..26ddf092e3ec 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -117,4 +117,7 @@ config QED_ISCSI
 config QED_FCOE
 	bool
 
+config QED_OOO
+	bool
+
 endif # NET_VENDOR_QLOGIC
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 82a0b90185df..c70cf2ad81c0 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -7,5 +7,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
 qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o
-qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
+qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o
 qed-$(CONFIG_QED_FCOE) += qed_fcoe.o
+qed-$(CONFIG_QED_OOO) += qed_ooo.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index af106be8cc08..afd07ad91631 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2069,6 +2069,12 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
 
 	num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs);
 
+	if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) {
+		DP_NOTICE(p_hwfn,
+			  "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n");
+		p_hwfn->hw_info.personality = QED_PCI_ETH_ROCE;
+	}
+
 	switch (p_hwfn->hw_info.personality) {
 	case QED_PCI_ETH_IWARP:
 		/* Each QP requires one connection */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 8f6ccc0c39e5..6e15d3c10ebf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -2308,7 +2308,7 @@ static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
 
 	DP_VERBOSE(hwfn, QED_MSG_DCB, "selector = %d protocol = %d pri = %d\n",
 		   app->selector, app->protocol, app->priority);
-	if (app->priority < 0 || app->priority >= QED_MAX_PFC_PRIORITIES) {
+	if (app->priority >= QED_MAX_PFC_PRIORITIES) {
 		DP_INFO(hwfn, "Invalid priority %d\n", app->priority);
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 9d989c96278c..409041eab189 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -41,6 +41,7 @@
 #include "qed_rdma.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
+#include "qed_ooo.h"
 
 #define QED_IWARP_ORD_DEFAULT		32
 #define QED_IWARP_IRD_DEFAULT		32
@@ -119,6 +120,13 @@ static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid)
 	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
 }
 
+void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
+			      struct iwarp_init_func_params *p_ramrod)
+{
+	p_ramrod->ll2_ooo_q_index = RESC_START(p_hwfn, QED_LL2_QUEUE) +
+				    p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle;
+}
+
 static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid)
 {
 	int rc;
@@ -1402,12 +1410,22 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
 	INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list);
 	spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock);
 
-	return qed_iwarp_prealloc_ep(p_hwfn, true);
+	rc = qed_iwarp_prealloc_ep(p_hwfn, true);
+	if (rc)
+		return rc;
+
+	return qed_ooo_alloc(p_hwfn);
 }
 
 void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
 {
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+	qed_ooo_free(p_hwfn);
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1);
+	kfree(iwarp_info->mpa_bufs);
+	kfree(iwarp_info->partial_fpdus);
+	kfree(iwarp_info->mpa_intermediate_buf);
 }
 
 int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
@@ -1705,6 +1723,569 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+static struct qed_iwarp_fpdu *qed_iwarp_get_curr_fpdu(struct qed_hwfn *p_hwfn,
+						      u16 cid)
+{
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	struct qed_iwarp_fpdu *partial_fpdu;
+	u32 idx;
+
+	idx = cid - qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_IWARP);
+	if (idx >= iwarp_info->max_num_partial_fpdus) {
+		DP_ERR(p_hwfn, "Invalid cid %x max_num_partial_fpdus=%x\n", cid,
+		       iwarp_info->max_num_partial_fpdus);
+		return NULL;
+	}
+
+	partial_fpdu = &iwarp_info->partial_fpdus[idx];
+
+	return partial_fpdu;
+}
+
+enum qed_iwarp_mpa_pkt_type {
+	QED_IWARP_MPA_PKT_PACKED,
+	QED_IWARP_MPA_PKT_PARTIAL,
+	QED_IWARP_MPA_PKT_UNALIGNED
+};
+
+#define QED_IWARP_INVALID_FPDU_LENGTH 0xffff
+#define QED_IWARP_MPA_FPDU_LENGTH_SIZE (2)
+#define QED_IWARP_MPA_CRC32_DIGEST_SIZE (4)
+
+/* Pad to multiple of 4 */
+#define QED_IWARP_PDU_DATA_LEN_WITH_PAD(data_len) ALIGN(data_len, 4)
+#define QED_IWARP_FPDU_LEN_WITH_PAD(_mpa_len)				   \
+	(QED_IWARP_PDU_DATA_LEN_WITH_PAD((_mpa_len) +			   \
+					 QED_IWARP_MPA_FPDU_LENGTH_SIZE) + \
+					 QED_IWARP_MPA_CRC32_DIGEST_SIZE)
+
+/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
+#define QED_IWARP_MAX_BDS_PER_FPDU 3
+
+char *pkt_type_str[] = {
+	"QED_IWARP_MPA_PKT_PACKED",
+	"QED_IWARP_MPA_PKT_PARTIAL",
+	"QED_IWARP_MPA_PKT_UNALIGNED"
+};
+
+static int
+qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn,
+		      struct qed_iwarp_fpdu *fpdu,
+		      struct qed_iwarp_ll2_buff *buf);
+
+static enum qed_iwarp_mpa_pkt_type
+qed_iwarp_mpa_classify(struct qed_hwfn *p_hwfn,
+		       struct qed_iwarp_fpdu *fpdu,
+		       u16 tcp_payload_len, u8 *mpa_data)
+{
+	enum qed_iwarp_mpa_pkt_type pkt_type;
+	u16 mpa_len;
+
+	if (fpdu->incomplete_bytes) {
+		pkt_type = QED_IWARP_MPA_PKT_UNALIGNED;
+		goto out;
+	}
+
+	/* special case of one byte remaining...
+	 * lower byte will be read next packet
+	 */
+	if (tcp_payload_len == 1) {
+		fpdu->fpdu_length = *mpa_data << BITS_PER_BYTE;
+		pkt_type = QED_IWARP_MPA_PKT_PARTIAL;
+		goto out;
+	}
+
+	mpa_len = ntohs(*((u16 *)(mpa_data)));
+	fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
+
+	if (fpdu->fpdu_length <= tcp_payload_len)
+		pkt_type = QED_IWARP_MPA_PKT_PACKED;
+	else
+		pkt_type = QED_IWARP_MPA_PKT_PARTIAL;
+
+out:
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA_ALIGN: %s: fpdu_length=0x%x tcp_payload_len:0x%x\n",
+		   pkt_type_str[pkt_type], fpdu->fpdu_length, tcp_payload_len);
+
+	return pkt_type;
+}
+
+static void
+qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf,
+		    struct qed_iwarp_fpdu *fpdu,
+		    struct unaligned_opaque_data *pkt_data,
+		    u16 tcp_payload_size, u8 placement_offset)
+{
+	fpdu->mpa_buf = buf;
+	fpdu->pkt_hdr = buf->data_phys_addr + placement_offset;
+	fpdu->pkt_hdr_size = pkt_data->tcp_payload_offset;
+	fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset;
+	fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset;
+
+	if (tcp_payload_size == 1)
+		fpdu->incomplete_bytes = QED_IWARP_INVALID_FPDU_LENGTH;
+	else if (tcp_payload_size < fpdu->fpdu_length)
+		fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size;
+	else
+		fpdu->incomplete_bytes = 0;	/* complete fpdu */
+
+	fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes;
+}
+
+static int
+qed_iwarp_cp_pkt(struct qed_hwfn *p_hwfn,
+		 struct qed_iwarp_fpdu *fpdu,
+		 struct unaligned_opaque_data *pkt_data,
+		 struct qed_iwarp_ll2_buff *buf, u16 tcp_payload_size)
+{
+	u8 *tmp_buf = p_hwfn->p_rdma_info->iwarp.mpa_intermediate_buf;
+	int rc;
+
+	/* need to copy the data from the partial packet stored in fpdu
+	 * to the new buf, for this we also need to move the data currently
+	 * placed on the buf. The assumption is that the buffer is big enough
+	 * since fpdu_length <= mss, we use an intermediate buffer since
+	 * we may need to copy the new data to an overlapping location
+	 */
+	if ((fpdu->mpa_frag_len + tcp_payload_size) > (u16)buf->buff_size) {
+		DP_ERR(p_hwfn,
+		       "MPA ALIGN: Unexpected: buffer is not large enough for split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n",
+		       buf->buff_size, fpdu->mpa_frag_len,
+		       tcp_payload_size, fpdu->incomplete_bytes);
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "MPA ALIGN Copying fpdu: [%p, %d] [%p, %d]\n",
+		   fpdu->mpa_frag_virt, fpdu->mpa_frag_len,
+		   (u8 *)(buf->data) + pkt_data->first_mpa_offset,
+		   tcp_payload_size);
+
+	memcpy(tmp_buf, fpdu->mpa_frag_virt, fpdu->mpa_frag_len);
+	memcpy(tmp_buf + fpdu->mpa_frag_len,
+	       (u8 *)(buf->data) + pkt_data->first_mpa_offset,
+	       tcp_payload_size);
+
+	rc = qed_iwarp_recycle_pkt(p_hwfn, fpdu, fpdu->mpa_buf);
+	if (rc)
+		return rc;
+
+	/* If we managed to post the buffer copy the data to the new buffer
+	 * o/w this will occur in the next round...
+	 */
+	memcpy((u8 *)(buf->data), tmp_buf,
+	       fpdu->mpa_frag_len + tcp_payload_size);
+
+	fpdu->mpa_buf = buf;
+	/* fpdu->pkt_hdr remains as is */
+	/* fpdu->mpa_frag is overridden with new buf */
+	fpdu->mpa_frag = buf->data_phys_addr;
+	fpdu->mpa_frag_virt = buf->data;
+	fpdu->mpa_frag_len += tcp_payload_size;
+
+	fpdu->incomplete_bytes -= tcp_payload_size;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "MPA ALIGN: split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n",
+		   buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size,
+		   fpdu->incomplete_bytes);
+
+	return 0;
+}
+
+static void
+qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn,
+			     struct qed_iwarp_fpdu *fpdu, u8 *mpa_data)
+{
+	u16 mpa_len;
+
+	/* Update incomplete packets if needed */
+	if (fpdu->incomplete_bytes == QED_IWARP_INVALID_FPDU_LENGTH) {
+		/* Missing lower byte is now available */
+		mpa_len = fpdu->fpdu_length | *mpa_data;
+		fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
+		fpdu->mpa_frag_len = fpdu->fpdu_length;
+		/* one byte of hdr */
+		fpdu->incomplete_bytes = fpdu->fpdu_length - 1;
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_RDMA,
+			   "MPA_ALIGN: Partial header mpa_len=%x fpdu_length=%x incomplete_bytes=%x\n",
+			   mpa_len, fpdu->fpdu_length, fpdu->incomplete_bytes);
+	}
+}
+
+#define QED_IWARP_IS_RIGHT_EDGE(_curr_pkt) \
+	(GET_FIELD((_curr_pkt)->flags,	   \
+		   UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE))
+
+/* This function is used to recycle a buffer using the ll2 drop option. It
+ * uses the mechanism to ensure that all buffers posted to tx before this one
+ * were completed. The buffer sent here will be sent as a cookie in the tx
+ * completion function and can then be reposted to rx chain when done. The flow
+ * that requires this is the flow where a FPDU splits over more than 3 tcp
+ * segments. In this case the driver needs to re-post a rx buffer instead of
+ * the one received, but driver can't simply repost a buffer it copied from
+ * as there is a case where the buffer was originally a packed FPDU, and is
+ * partially posted to FW. Driver needs to ensure FW is done with it.
+ */
+static int
+qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn,
+		      struct qed_iwarp_fpdu *fpdu,
+		      struct qed_iwarp_ll2_buff *buf)
+{
+	struct qed_ll2_tx_pkt_info tx_pkt;
+	u8 ll2_handle;
+	int rc;
+
+	memset(&tx_pkt, 0, sizeof(tx_pkt));
+	tx_pkt.num_of_bds = 1;
+	tx_pkt.tx_dest = QED_LL2_TX_DEST_DROP;
+	tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2;
+	tx_pkt.first_frag = fpdu->pkt_hdr;
+	tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
+	buf->piggy_buf = NULL;
+	tx_pkt.cookie = buf;
+
+	ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
+
+	rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
+	if (rc)
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Can't drop packet rc=%d\n", rc);
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "MPA_ALIGN: send drop tx packet [%lx, 0x%x], buf=%p, rc=%d\n",
+		   (unsigned long int)tx_pkt.first_frag,
+		   tx_pkt.first_frag_len, buf, rc);
+
+	return rc;
+}
+
+static int
+qed_iwarp_win_right_edge(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu)
+{
+	struct qed_ll2_tx_pkt_info tx_pkt;
+	u8 ll2_handle;
+	int rc;
+
+	memset(&tx_pkt, 0, sizeof(tx_pkt));
+	tx_pkt.num_of_bds = 1;
+	tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+	tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2;
+
+	tx_pkt.first_frag = fpdu->pkt_hdr;
+	tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
+	tx_pkt.enable_ip_cksum = true;
+	tx_pkt.enable_l4_cksum = true;
+	tx_pkt.calc_ip_len = true;
+	/* vlan overload with enum iwarp_ll2_tx_queues */
+	tx_pkt.vlan = IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE;
+
+	ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
+
+	rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
+	if (rc)
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Can't send right edge rc=%d\n", rc);
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "MPA_ALIGN: Sent right edge FPDU num_bds=%d [%lx, 0x%x], rc=%d\n",
+		   tx_pkt.num_of_bds,
+		   (unsigned long int)tx_pkt.first_frag,
+		   tx_pkt.first_frag_len, rc);
+
+	return rc;
+}
+
+static int
+qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn,
+		    struct qed_iwarp_fpdu *fpdu,
+		    struct unaligned_opaque_data *curr_pkt,
+		    struct qed_iwarp_ll2_buff *buf,
+		    u16 tcp_payload_size, enum qed_iwarp_mpa_pkt_type pkt_type)
+{
+	struct qed_ll2_tx_pkt_info tx_pkt;
+	u8 ll2_handle;
+	int rc;
+
+	memset(&tx_pkt, 0, sizeof(tx_pkt));
+
+	/* An unaligned packet means it's split over two tcp segments. So the
+	 * complete packet requires 3 bds, one for the header, one for the
+	 * part of the fpdu of the first tcp segment, and the last fragment
+	 * will point to the remainder of the fpdu. A packed pdu, requires only
+	 * two bds, one for the header and one for the data.
+	 */
+	tx_pkt.num_of_bds = (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED) ? 3 : 2;
+	tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+	tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; /* offset in words */
+
+	/* Send the mpa_buf only with the last fpdu (in case of packed) */
+	if (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED ||
+	    tcp_payload_size <= fpdu->fpdu_length)
+		tx_pkt.cookie = fpdu->mpa_buf;
+
+	tx_pkt.first_frag = fpdu->pkt_hdr;
+	tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
+	tx_pkt.enable_ip_cksum = true;
+	tx_pkt.enable_l4_cksum = true;
+	tx_pkt.calc_ip_len = true;
+	/* vlan overload with enum iwarp_ll2_tx_queues */
+	tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE;
+
+	/* special case of unaligned packet and not packed, need to send
+	 * both buffers as cookie to release.
+	 */
+	if (tcp_payload_size == fpdu->incomplete_bytes)
+		fpdu->mpa_buf->piggy_buf = buf;
+
+	ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
+
+	/* Set first fragment to header */
+	rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
+	if (rc)
+		goto out;
+
+	/* Set second fragment to first part of packet */
+	rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, ll2_handle,
+					       fpdu->mpa_frag,
+					       fpdu->mpa_frag_len);
+	if (rc)
+		goto out;
+
+	if (!fpdu->incomplete_bytes)
+		goto out;
+
+	/* Set third fragment to second part of the packet */
+	rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn,
+					       ll2_handle,
+					       buf->data_phys_addr +
+					       curr_pkt->first_mpa_offset,
+					       fpdu->incomplete_bytes);
+out:
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "MPA_ALIGN: Sent FPDU num_bds=%d first_frag_len=%x, mpa_frag_len=0x%x, incomplete_bytes:0x%x rc=%d\n",
+		   tx_pkt.num_of_bds,
+		   tx_pkt.first_frag_len,
+		   fpdu->mpa_frag_len,
+		   fpdu->incomplete_bytes, rc);
+
+	return rc;
+}
+
+static void
+qed_iwarp_mpa_get_data(struct qed_hwfn *p_hwfn,
+		       struct unaligned_opaque_data *curr_pkt,
+		       u32 opaque_data0, u32 opaque_data1)
+{
+	u64 opaque_data;
+
+	opaque_data = HILO_64(opaque_data1, opaque_data0);
+	*curr_pkt = *((struct unaligned_opaque_data *)&opaque_data);
+
+	curr_pkt->first_mpa_offset = curr_pkt->tcp_payload_offset +
+				     le16_to_cpu(curr_pkt->first_mpa_offset);
+	curr_pkt->cid = le32_to_cpu(curr_pkt->cid);
+}
+
+/* This function is called when an unaligned or incomplete MPA packet arrives
+ * driver needs to align the packet, perhaps using previous data and send
+ * it down to FW once it is aligned.
+ */
+static int
+qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn,
+			  struct qed_iwarp_ll2_mpa_buf *mpa_buf)
+{
+	struct unaligned_opaque_data *curr_pkt = &mpa_buf->data;
+	struct qed_iwarp_ll2_buff *buf = mpa_buf->ll2_buf;
+	enum qed_iwarp_mpa_pkt_type pkt_type;
+	struct qed_iwarp_fpdu *fpdu;
+	int rc = -EINVAL;
+	u8 *mpa_data;
+
+	fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, curr_pkt->cid & 0xffff);
+	if (!fpdu) { /* something corrupt with cid, post rx back */
+		DP_ERR(p_hwfn, "Invalid cid, drop and post back to rx cid=%x\n",
+		       curr_pkt->cid);
+		goto err;
+	}
+
+	do {
+		mpa_data = ((u8 *)(buf->data) + curr_pkt->first_mpa_offset);
+
+		pkt_type = qed_iwarp_mpa_classify(p_hwfn, fpdu,
+						  mpa_buf->tcp_payload_len,
+						  mpa_data);
+
+		switch (pkt_type) {
+		case QED_IWARP_MPA_PKT_PARTIAL:
+			qed_iwarp_init_fpdu(buf, fpdu,
+					    curr_pkt,
+					    mpa_buf->tcp_payload_len,
+					    mpa_buf->placement_offset);
+
+			if (!QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) {
+				mpa_buf->tcp_payload_len = 0;
+				break;
+			}
+
+			rc = qed_iwarp_win_right_edge(p_hwfn, fpdu);
+
+			if (rc) {
+				DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+					   "Can't send FPDU:reset rc=%d\n", rc);
+				memset(fpdu, 0, sizeof(*fpdu));
+				break;
+			}
+
+			mpa_buf->tcp_payload_len = 0;
+			break;
+		case QED_IWARP_MPA_PKT_PACKED:
+			qed_iwarp_init_fpdu(buf, fpdu,
+					    curr_pkt,
+					    mpa_buf->tcp_payload_len,
+					    mpa_buf->placement_offset);
+
+			rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf,
+						 mpa_buf->tcp_payload_len,
+						 pkt_type);
+			if (rc) {
+				DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+					   "Can't send FPDU:reset rc=%d\n", rc);
+				memset(fpdu, 0, sizeof(*fpdu));
+				break;
+			}
+
+			mpa_buf->tcp_payload_len -= fpdu->fpdu_length;
+			curr_pkt->first_mpa_offset += fpdu->fpdu_length;
+			break;
+		case QED_IWARP_MPA_PKT_UNALIGNED:
+			qed_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data);
+			if (mpa_buf->tcp_payload_len < fpdu->incomplete_bytes) {
+				/* special handling of fpdu split over more
+				 * than 2 segments
+				 */
+				if (QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) {
+					rc = qed_iwarp_win_right_edge(p_hwfn,
+								      fpdu);
+					/* packet will be re-processed later */
+					if (rc)
+						return rc;
+				}
+
+				rc = qed_iwarp_cp_pkt(p_hwfn, fpdu, curr_pkt,
+						      buf,
+						      mpa_buf->tcp_payload_len);
+				if (rc) /* packet will be re-processed later */
+					return rc;
+
+				mpa_buf->tcp_payload_len = 0;
+				break;
+			}
+
+			rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf,
+						 mpa_buf->tcp_payload_len,
+						 pkt_type);
+			if (rc) {
+				DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+					   "Can't send FPDU:delay rc=%d\n", rc);
+				/* don't reset fpdu -> we need it for next
+				 * classify
+				 */
+				break;
+			}
+
+			mpa_buf->tcp_payload_len -= fpdu->incomplete_bytes;
+			curr_pkt->first_mpa_offset += fpdu->incomplete_bytes;
+			/* The framed PDU was sent - no more incomplete bytes */
+			fpdu->incomplete_bytes = 0;
+			break;
+		}
+	} while (mpa_buf->tcp_payload_len && !rc);
+
+	return rc;
+
+err:
+	qed_iwarp_ll2_post_rx(p_hwfn,
+			      buf,
+			      p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle);
+	return rc;
+}
+
+static void qed_iwarp_process_pending_pkts(struct qed_hwfn *p_hwfn)
+{
+	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	struct qed_iwarp_ll2_mpa_buf *mpa_buf = NULL;
+	int rc;
+
+	while (!list_empty(&iwarp_info->mpa_buf_pending_list)) {
+		mpa_buf = list_first_entry(&iwarp_info->mpa_buf_pending_list,
+					   struct qed_iwarp_ll2_mpa_buf,
+					   list_entry);
+
+		rc = qed_iwarp_process_mpa_pkt(p_hwfn, mpa_buf);
+
+		/* busy means break and continue processing later, don't
+		 * remove the buf from the pending list.
+		 */
+		if (rc == -EBUSY)
+			break;
+
+		list_del(&mpa_buf->list_entry);
+		list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list);
+
+		if (rc) {	/* different error, don't continue */
+			DP_NOTICE(p_hwfn, "process pkts failed rc=%d\n", rc);
+			break;
+		}
+	}
+}
+
+static void
+qed_iwarp_ll2_comp_mpa_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
+{
+	struct qed_iwarp_ll2_mpa_buf *mpa_buf;
+	struct qed_iwarp_info *iwarp_info;
+	struct qed_hwfn *p_hwfn = cxt;
+
+	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+	mpa_buf = list_first_entry(&iwarp_info->mpa_buf_list,
+				   struct qed_iwarp_ll2_mpa_buf, list_entry);
+	if (!mpa_buf) {
+		DP_ERR(p_hwfn, "No free mpa buf\n");
+		goto err;
+	}
+
+	list_del(&mpa_buf->list_entry);
+	qed_iwarp_mpa_get_data(p_hwfn, &mpa_buf->data,
+			       data->opaque_data_0, data->opaque_data_1);
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "LL2 MPA CompRx payload_len:0x%x\tfirst_mpa_offset:0x%x\ttcp_payload_offset:0x%x\tflags:0x%x\tcid:0x%x\n",
+		   data->length.packet_length, mpa_buf->data.first_mpa_offset,
+		   mpa_buf->data.tcp_payload_offset, mpa_buf->data.flags,
+		   mpa_buf->data.cid);
+
+	mpa_buf->ll2_buf = data->cookie;
+	mpa_buf->tcp_payload_len = data->length.packet_length -
+				   mpa_buf->data.first_mpa_offset;
+	mpa_buf->data.first_mpa_offset += data->u.placement_offset;
+	mpa_buf->placement_offset = data->u.placement_offset;
+
+	list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_pending_list);
+
+	qed_iwarp_process_pending_pkts(p_hwfn);
+	return;
+err:
+	qed_iwarp_ll2_post_rx(p_hwfn, data->cookie,
+			      iwarp_info->ll2_mpa_handle);
+}
+
 static void
 qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
 {
@@ -1725,6 +2306,14 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
 
 	memset(&cm_info, 0, sizeof(cm_info));
 	ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+
+	/* Check if packet was received with errors... */
+	if (data->err_flags) {
+		DP_NOTICE(p_hwfn, "Error received on SYN packet: 0x%x\n",
+			  data->err_flags);
+		goto err;
+	}
+
 	if (GET_FIELD(data->parse_flags,
 		      PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) &&
 	    GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) {
@@ -1839,10 +2428,25 @@ static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle,
 				      bool b_last_fragment, bool b_last_packet)
 {
 	struct qed_iwarp_ll2_buff *buffer = cookie;
+	struct qed_iwarp_ll2_buff *piggy;
 	struct qed_hwfn *p_hwfn = cxt;
 
+	if (!buffer)		/* can happen in packed mpa unaligned... */
+		return;
+
 	/* this was originally an rx packet, post it back */
+	piggy = buffer->piggy_buf;
+	if (piggy) {
+		buffer->piggy_buf = NULL;
+		qed_iwarp_ll2_post_rx(p_hwfn, piggy, connection_handle);
+	}
+
 	qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle);
+
+	if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle)
+		qed_iwarp_process_pending_pkts(p_hwfn);
+
+	return;
 }
 
 static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
@@ -1855,12 +2459,44 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
 	if (!buffer)
 		return;
 
+	if (buffer->piggy_buf) {
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  buffer->piggy_buf->buff_size,
+				  buffer->piggy_buf->data,
+				  buffer->piggy_buf->data_phys_addr);
+
+		kfree(buffer->piggy_buf);
+	}
+
 	dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
 			  buffer->data, buffer->data_phys_addr);
 
 	kfree(buffer);
 }
 
+/* The only slowpath for iwarp ll2 is unalign flush. When this completion
+ * is received, need to reset the FPDU.
+ */
+void
+qed_iwarp_ll2_slowpath(void *cxt,
+		       u8 connection_handle,
+		       u32 opaque_data_0, u32 opaque_data_1)
+{
+	struct unaligned_opaque_data unalign_data;
+	struct qed_hwfn *p_hwfn = cxt;
+	struct qed_iwarp_fpdu *fpdu;
+
+	qed_iwarp_mpa_get_data(p_hwfn, &unalign_data,
+			       opaque_data_0, opaque_data_1);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "(0x%x) Flush fpdu\n",
+		   unalign_data.cid);
+
+	fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, (u16)unalign_data.cid);
+	if (fpdu)
+		memset(fpdu, 0, sizeof(*fpdu));
+}
+
 static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
@@ -1876,6 +2512,26 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
 	}
 
+	if (iwarp_info->ll2_ooo_handle != QED_IWARP_HANDLE_INVAL) {
+		rc = qed_ll2_terminate_connection(p_hwfn,
+						  iwarp_info->ll2_ooo_handle);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to terminate ooo connection\n");
+
+		qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_ooo_handle);
+		iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL;
+	}
+
+	if (iwarp_info->ll2_mpa_handle != QED_IWARP_HANDLE_INVAL) {
+		rc = qed_ll2_terminate_connection(p_hwfn,
+						  iwarp_info->ll2_mpa_handle);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to terminate mpa connection\n");
+
+		qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_mpa_handle);
+		iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
+	}
+
 	qed_llh_remove_mac_filter(p_hwfn,
 				  p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
 	return rc;
@@ -1927,10 +2583,15 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	struct qed_iwarp_info *iwarp_info;
 	struct qed_ll2_acquire_data data;
 	struct qed_ll2_cbs cbs;
+	u32 mpa_buff_size;
+	u16 n_ooo_bufs;
 	int rc = 0;
+	int i;
 
 	iwarp_info = &p_hwfn->p_rdma_info->iwarp;
 	iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+	iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL;
+	iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
 
 	iwarp_info->max_mtu = params->max_mtu;
 
@@ -1978,6 +2639,91 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	if (rc)
 		goto err;
 
+	/* Start OOO connection */
+	data.input.conn_type = QED_LL2_TYPE_OOO;
+	data.input.mtu = params->max_mtu;
+
+	n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) /
+		     iwarp_info->max_mtu;
+	n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE);
+
+	data.input.rx_num_desc = n_ooo_bufs;
+	data.input.rx_num_ooo_buffers = n_ooo_bufs;
+
+	data.input.tx_max_bds_per_packet = 1;	/* will never be fragmented */
+	data.input.tx_num_desc = QED_IWARP_LL2_OOO_DEF_TX_SIZE;
+	data.p_connection_handle = &iwarp_info->ll2_ooo_handle;
+
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
+	if (rc)
+		goto err;
+
+	rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_ooo_handle);
+	if (rc)
+		goto err;
+
+	/* Start Unaligned MPA connection */
+	cbs.rx_comp_cb = qed_iwarp_ll2_comp_mpa_pkt;
+	cbs.slowpath_cb = qed_iwarp_ll2_slowpath;
+
+	memset(&data, 0, sizeof(data));
+	data.input.conn_type = QED_LL2_TYPE_IWARP;
+	data.input.mtu = params->max_mtu;
+	/* FW requires that once a packet arrives OOO, it must have at
+	 * least 2 rx buffers available on the unaligned connection
+	 * for handling the case that it is a partial fpdu.
+	 */
+	data.input.rx_num_desc = n_ooo_bufs * 2;
+	data.input.tx_num_desc = data.input.rx_num_desc;
+	data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU;
+	data.p_connection_handle = &iwarp_info->ll2_mpa_handle;
+	data.input.secondary_queue = true;
+	data.cbs = &cbs;
+
+	rc = qed_ll2_acquire_connection(p_hwfn, &data);
+	if (rc)
+		goto err;
+
+	rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_mpa_handle);
+	if (rc)
+		goto err;
+
+	mpa_buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu);
+	rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
+					 data.input.rx_num_desc,
+					 mpa_buff_size,
+					 iwarp_info->ll2_mpa_handle);
+	if (rc)
+		goto err;
+
+	iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps,
+					    sizeof(*iwarp_info->partial_fpdus),
+					    GFP_KERNEL);
+	if (!iwarp_info->partial_fpdus)
+		goto err;
+
+	iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps;
+
+	iwarp_info->mpa_intermediate_buf = kzalloc(mpa_buff_size, GFP_KERNEL);
+	if (!iwarp_info->mpa_intermediate_buf)
+		goto err;
+
+	/* The mpa_bufs array serves for pending RX packets received on the
+	 * mpa ll2 that don't have place on the tx ring and require later
+	 * processing. We can't fail on allocation of such a struct therefore
+	 * we allocate enough to take care of all rx packets
+	 */
+	iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc,
+				       sizeof(*iwarp_info->mpa_bufs),
+				       GFP_KERNEL);
+	if (!iwarp_info->mpa_bufs)
+		goto err;
+
+	INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list);
+	INIT_LIST_HEAD(&iwarp_info->mpa_buf_list);
+	for (i = 0; i < data.input.rx_num_desc; i++)
+		list_add_tail(&iwarp_info->mpa_bufs[i].list_entry,
+			      &iwarp_info->mpa_buf_list);
 	return rc;
 err:
 	qed_iwarp_ll2_stop(p_hwfn, p_ptt);
@@ -2014,6 +2760,7 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 
 	qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
 				  qed_iwarp_async_event);
+	qed_ooo_setup(p_hwfn);
 
 	return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index 148ef3c33a5d..c1ecd743305f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -47,18 +47,51 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
 #define QED_IWARP_LL2_SYN_TX_SIZE       (128)
 #define QED_IWARP_LL2_SYN_RX_SIZE       (256)
 #define QED_IWARP_MAX_SYN_PKT_SIZE      (128)
-#define QED_IWARP_HANDLE_INVAL			(0xff)
+
+#define QED_IWARP_LL2_OOO_DEF_TX_SIZE   (256)
+#define QED_IWARP_MAX_OOO		(16)
+#define QED_IWARP_LL2_OOO_MAX_RX_SIZE   (16384)
+
+#define QED_IWARP_HANDLE_INVAL		(0xff)
 
 struct qed_iwarp_ll2_buff {
+	struct qed_iwarp_ll2_buff *piggy_buf;
 	void *data;
 	dma_addr_t data_phys_addr;
 	u32 buff_size;
 };
 
+struct qed_iwarp_ll2_mpa_buf {
+	struct list_head list_entry;
+	struct qed_iwarp_ll2_buff *ll2_buf;
+	struct unaligned_opaque_data data;
+	u16 tcp_payload_len;
+	u8 placement_offset;
+};
+
+/* In some cases a fpdu will arrive with only one byte of the header, in this
+ * case the fpdu_length will be partial (contain only higher byte and
+ * incomplete bytes will contain the invalid value
+ */
+#define QED_IWARP_INVALID_INCOMPLETE_BYTES 0xffff
+
+struct qed_iwarp_fpdu {
+	struct qed_iwarp_ll2_buff *mpa_buf;
+	void *mpa_frag_virt;
+	dma_addr_t mpa_frag;
+	dma_addr_t pkt_hdr;
+	u16 mpa_frag_len;
+	u16 fpdu_length;
+	u16 incomplete_bytes;
+	u8 pkt_hdr_size;
+};
+
 struct qed_iwarp_info {
 	struct list_head listen_list;	/* qed_iwarp_listener */
 	struct list_head ep_list;	/* qed_iwarp_ep */
 	struct list_head ep_free_list;	/* pre-allocated ep's */
+	struct list_head mpa_buf_list;	/* list of mpa_bufs */
+	struct list_head mpa_buf_pending_list;
 	spinlock_t iw_lock;	/* for iwarp resources */
 	spinlock_t qp_lock;	/* for teardown races */
 	u32 rcv_wnd_scale;
@@ -67,9 +100,15 @@ struct qed_iwarp_info {
 	u8 crc_needed;
 	u8 tcp_flags;
 	u8 ll2_syn_handle;
+	u8 ll2_ooo_handle;
+	u8 ll2_mpa_handle;
 	u8 peer2peer;
 	enum mpa_negotiation_mode mpa_rev;
 	enum mpa_rtr_type rtr_type;
+	struct qed_iwarp_fpdu *partial_fpdus;
+	struct qed_iwarp_ll2_mpa_buf *mpa_bufs;
+	u8 *mpa_intermediate_buf;
+	u16 max_num_partial_fpdus;
 };
 
 enum qed_iwarp_ep_state {
@@ -147,6 +186,9 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
 int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		    struct qed_rdma_start_in_params *params);
 
+void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
+			      struct iwarp_init_func_params *p_ramrod);
+
 int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index c06ad4f0758e..047f556ca62e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -413,6 +413,7 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn,
 				  struct qed_ll2_comp_rx_data *data)
 {
 	data->parse_flags = le16_to_cpu(p_cqe->rx_cqe_fp.parse_flags.flags);
+	data->err_flags = le16_to_cpu(p_cqe->rx_cqe_fp.err_flags.flags);
 	data->length.packet_length =
 	    le16_to_cpu(p_cqe->rx_cqe_fp.packet_length);
 	data->vlan = le16_to_cpu(p_cqe->rx_cqe_fp.vlan);
@@ -422,6 +423,41 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn,
 }
 
 static int
+qed_ll2_handle_slowpath(struct qed_hwfn *p_hwfn,
+			struct qed_ll2_info *p_ll2_conn,
+			union core_rx_cqe_union *p_cqe,
+			unsigned long *p_lock_flags)
+{
+	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
+	struct core_rx_slow_path_cqe *sp_cqe;
+
+	sp_cqe = &p_cqe->rx_cqe_sp;
+	if (sp_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) {
+		DP_NOTICE(p_hwfn,
+			  "LL2 - unexpected Rx CQE slowpath ramrod_cmd_id:%d\n",
+			  sp_cqe->ramrod_cmd_id);
+		return -EINVAL;
+	}
+
+	if (!p_ll2_conn->cbs.slowpath_cb) {
+		DP_NOTICE(p_hwfn,
+			  "LL2 - received RX_QUEUE_FLUSH but no callback was provided\n");
+		return -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
+
+	p_ll2_conn->cbs.slowpath_cb(p_ll2_conn->cbs.cookie,
+				    p_ll2_conn->my_id,
+				    le32_to_cpu(sp_cqe->opaque_data.data[0]),
+				    le32_to_cpu(sp_cqe->opaque_data.data[1]));
+
+	spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
+
+	return 0;
+}
+
+static int
 qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
 			      struct qed_ll2_info *p_ll2_conn,
 			      union core_rx_cqe_union *p_cqe,
@@ -494,8 +530,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 
 		switch (cqe->rx_cqe_sp.type) {
 		case CORE_RX_CQE_TYPE_SLOW_PATH:
-			DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n");
-			rc = -EINVAL;
+			rc = qed_ll2_handle_slowpath(p_hwfn, p_ll2_conn,
+						     cqe, &flags);
 			break;
 		case CORE_RX_CQE_TYPE_GSI_OFFLOAD:
 		case CORE_RX_CQE_TYPE_REGULAR:
@@ -893,7 +929,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
 	p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
 	p_ramrod->queue_id = p_ll2_conn->queue_id;
-	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
+	p_ramrod->main_func_queue = p_ll2_conn->main_func_queue ? 1 : 0;
 
 	if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
 	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
@@ -1104,6 +1140,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
 					 struct qed_ll2_info *p_ll2_info)
 {
 	struct qed_ll2_tx_packet *p_descq;
+	u32 desc_size;
 	u32 capacity;
 	int rc = 0;
 
@@ -1121,13 +1158,17 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
 		goto out;
 
 	capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain);
-	p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet),
-			  GFP_KERNEL);
+	/* First element is part of the packet, rest are flexibly added */
+	desc_size = (sizeof(*p_descq) +
+		     (p_ll2_info->input.tx_max_bds_per_packet - 1) *
+		     sizeof(p_descq->bds_set));
+
+	p_descq = kcalloc(capacity, desc_size, GFP_KERNEL);
 	if (!p_descq) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	p_ll2_info->tx_queue.descq_array = p_descq;
+	p_ll2_info->tx_queue.descq_mem = p_descq;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
 		   "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n",
@@ -1208,6 +1249,7 @@ qed_ll2_set_cbs(struct qed_ll2_info *p_ll2_info, const struct qed_ll2_cbs *cbs)
 	p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb;
 	p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb;
 	p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb;
+	p_ll2_info->cbs.slowpath_cb = cbs->slowpath_cb;
 	p_ll2_info->cbs.cookie = cbs->cookie;
 
 	return 0;
@@ -1259,6 +1301,11 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
 
 	p_ll2_info->tx_dest = (data->input.tx_dest == QED_LL2_TX_DEST_NW) ?
 			      CORE_TX_DEST_NW : CORE_TX_DEST_LB;
+	if (data->input.conn_type == QED_LL2_TYPE_OOO ||
+	    data->input.secondary_queue)
+		p_ll2_info->main_func_queue = false;
+	else
+		p_ll2_info->main_func_queue = true;
 
 	/* Correct maximum number of Tx BDs */
 	p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet;
@@ -1358,11 +1405,13 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 {
 	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_info *p_ll2_conn;
+	struct qed_ll2_tx_packet *p_pkt;
 	struct qed_ll2_rx_queue *p_rx;
 	struct qed_ll2_tx_queue *p_tx;
 	struct qed_ptt *p_ptt;
 	int rc = -EINVAL;
 	u32 i, capacity;
+	u32 desc_size;
 	u8 qid;
 
 	p_ptt = qed_ptt_acquire(p_hwfn);
@@ -1396,9 +1445,15 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 	INIT_LIST_HEAD(&p_tx->sending_descq);
 	spin_lock_init(&p_tx->lock);
 	capacity = qed_chain_get_capacity(&p_tx->txq_chain);
-	for (i = 0; i < capacity; i++)
-		list_add_tail(&p_tx->descq_array[i].list_entry,
-			      &p_tx->free_descq);
+	/* First element is part of the packet, rest are flexibly added */
+	desc_size = (sizeof(*p_pkt) +
+		     (p_ll2_conn->input.tx_max_bds_per_packet - 1) *
+		     sizeof(p_pkt->bds_set));
+
+	for (i = 0; i < capacity; i++) {
+		p_pkt = p_tx->descq_mem + desc_size * i;
+		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+	}
 	p_tx->cur_completing_bd_idx = 0;
 	p_tx->bds_idx = 0;
 	p_tx->b_completing_packet = false;
@@ -1578,11 +1633,28 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 	roce_flavor = (pkt->qed_roce_flavor == QED_LL2_ROCE) ? CORE_ROCE
 							     : CORE_RROCE;
 
-	tx_dest = (pkt->tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW
-						       : CORE_TX_DEST_LB;
+	switch (pkt->tx_dest) {
+	case QED_LL2_TX_DEST_NW:
+		tx_dest = CORE_TX_DEST_NW;
+		break;
+	case QED_LL2_TX_DEST_LB:
+		tx_dest = CORE_TX_DEST_LB;
+		break;
+	case QED_LL2_TX_DEST_DROP:
+		tx_dest = CORE_TX_DEST_DROP;
+		break;
+	default:
+		tx_dest = CORE_TX_DEST_LB;
+		break;
+	}
 
 	start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
-	start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn) &&
+	    p_ll2->input.conn_type == QED_LL2_TYPE_OOO)
+		start_bd->nw_vlan_or_lb_echo =
+		    cpu_to_le16(IWARP_LL2_IN_ORDER_TX_QUEUE);
+	else
+		start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
 	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
 		  cpu_to_le16(pkt->l4_hdr_offset_w));
 	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest);
@@ -1590,6 +1662,9 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 	SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1);
 	SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds);
 	SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor);
+	SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum));
+	SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum));
+	SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len));
 	start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data);
 	DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag);
 	start_bd->nbytes = cpu_to_le16(pkt->first_frag_len);
@@ -1697,7 +1772,7 @@ int qed_ll2_prepare_tx_packet(void *cxt,
 	p_tx = &p_ll2_conn->tx_queue;
 	p_tx_chain = &p_tx->txq_chain;
 
-	if (pkt->num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
+	if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet)
 		return -EIO;
 
 	spin_lock_irqsave(&p_tx->lock, flags);
@@ -1857,7 +1932,7 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle)
 		qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
 	}
 
-	kfree(p_ll2_conn->tx_queue.descq_array);
+	kfree(p_ll2_conn->tx_queue.descq_mem);
 	qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
 
 	kfree(p_ll2_conn->rx_queue.descq_array);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index a822528e9c63..f65817012e97 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -63,17 +63,14 @@ struct qed_ll2_rx_packet {
 struct qed_ll2_tx_packet {
 	struct list_head list_entry;
 	u16 bd_used;
-	u16 vlan;
-	u16 l4_hdr_offset_w;
-	u8 bd_flags;
 	bool notify_fw;
 	void *cookie;
-
+	/* Flexible Array of bds_set determined by max_bds_per_packet */
 	struct {
 		struct core_tx_bd *txq_bd;
 		dma_addr_t tx_frag;
 		u16 frag_len;
-	} bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET];
+	} bds_set[1];
 };
 
 struct qed_ll2_rx_queue {
@@ -101,7 +98,7 @@ struct qed_ll2_tx_queue {
 	struct list_head active_descq;
 	struct list_head free_descq;
 	struct list_head sending_descq;
-	struct qed_ll2_tx_packet *descq_array;
+	void *descq_mem; /* memory for variable sized qed_ll2_tx_packet*/
 	struct qed_ll2_tx_packet *cur_send_packet;
 	struct qed_ll2_tx_packet cur_completing_packet;
 	u16 cur_completing_bd_idx;
@@ -124,6 +121,7 @@ struct qed_ll2_info {
 	bool b_active;
 	enum core_tx_dest tx_dest;
 	u8 tx_stats_en;
+	bool main_func_queue;
 	struct qed_ll2_rx_queue rx_queue;
 	struct qed_ll2_tx_queue tx_queue;
 	struct qed_ll2_cbs cbs;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 376485d99357..8b99c7d26f34 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1691,12 +1691,12 @@ qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn,
 	case FW_MB_PARAM_GET_PF_RDMA_ROCE:
 		*p_proto = QED_PCI_ETH_ROCE;
 		break;
+	case FW_MB_PARAM_GET_PF_RDMA_IWARP:
+		*p_proto = QED_PCI_ETH_IWARP;
+		break;
 	case FW_MB_PARAM_GET_PF_RDMA_BOTH:
-		DP_NOTICE(p_hwfn,
-			  "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n");
-		*p_proto = QED_PCI_ETH_ROCE;
+		*p_proto = QED_PCI_ETH_RDMA;
 		break;
-	case FW_MB_PARAM_GET_PF_RDMA_IWARP:
 	default:
 		DP_NOTICE(p_hwfn,
 			  "MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n",
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 000636530111..6172354b451c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -103,18 +103,28 @@ int qed_ooo_alloc(struct qed_hwfn *p_hwfn)
 {
 	u16 max_num_archipelagos = 0, cid_base;
 	struct qed_ooo_info *p_ooo_info;
+	enum protocol_type proto;
 	u16 max_num_isles = 0;
 	u32 i;
 
-	if (p_hwfn->hw_info.personality != QED_PCI_ISCSI) {
+	switch (p_hwfn->hw_info.personality) {
+	case QED_PCI_ISCSI:
+		proto = PROTOCOLID_ISCSI;
+		break;
+	case QED_PCI_ETH_RDMA:
+	case QED_PCI_ETH_IWARP:
+		proto = PROTOCOLID_IWARP;
+		break;
+	default:
 		DP_NOTICE(p_hwfn,
 			  "Failed to allocate qed_ooo_info: unknown personality\n");
 		return -EINVAL;
 	}
 
-	max_num_archipelagos = p_hwfn->pf_params.iscsi_pf_params.num_cons;
+	max_num_archipelagos = (u16)qed_cxt_get_proto_cid_count(p_hwfn, proto,
+								NULL);
 	max_num_isles = QED_MAX_NUM_ISLES + max_num_archipelagos;
-	cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ISCSI);
+	cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, proto);
 
 	if (!max_num_archipelagos) {
 		DP_NOTICE(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.h b/drivers/net/ethernet/qlogic/qed/qed_ooo.h
index e8ed40b848f5..49c4e75b15b1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.h
@@ -83,7 +83,7 @@ struct qed_ooo_info {
 	u16 cid_base;
 };
 
-#if IS_ENABLED(CONFIG_QED_ISCSI)
+#if IS_ENABLED(CONFIG_QED_OOO)
 void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
 				struct qed_ooo_info *p_ooo_info,
 				struct ooo_opaque *p_cqe);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 6fb99518a61f..c8c4b3940564 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -156,7 +156,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	p_hwfn->p_rdma_info = p_rdma_info;
-	p_rdma_info->proto = PROTOCOLID_ROCE;
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+		p_rdma_info->proto = PROTOCOLID_IWARP;
+	else
+		p_rdma_info->proto = PROTOCOLID_ROCE;
 
 	num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto,
 					       NULL);
@@ -206,11 +209,11 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 		goto free_pd_map;
 	}
 
-	/* Allocate bitmap for cq's. The maximum number of CQs is bounded to
-	 * twice the number of QPs.
+	/* Allocate bitmap for cq's. The maximum number of CQs is bound to
+	 * the number of connections we support. (num_qps in iWARP or
+	 * num_qps/2 in RoCE).
 	 */
-	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map,
-				 p_rdma_info->num_qps * 2, "CQ");
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, num_cons, "CQ");
 	if (rc) {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
 			   "Failed to allocate cq bitmap, rc = %d\n", rc);
@@ -219,10 +222,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 
 	/* Allocate bitmap for toggle bit for cq icids
 	 * We toggle the bit every time we create or resize cq for a given icid.
-	 * The maximum number of CQs is bounded to  twice the number of QPs.
+	 * Size needs to equal the size of the cq bmap.
 	 */
 	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits,
-				 p_rdma_info->num_qps * 2, "Toggle");
+				 num_cons, "Toggle");
 	if (rc) {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
 			   "Failed to allocate toogle bits, rc = %d\n", rc);
@@ -548,10 +551,13 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+		qed_iwarp_init_fw_ramrod(p_hwfn,
+					 &p_ent->ramrod.iwarp_init_func.iwarp);
 		p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma;
-	else
+	} else {
 		p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
+	}
 
 	p_params_header = &p_ramrod->params_header;
 	p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 46d0c3cb83a5..a1d33f35aad3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -377,6 +377,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 		p_ramrod->personality = PERSONALITY_ISCSI;
 		break;
 	case QED_PCI_ETH_ROCE:
+	case QED_PCI_ETH_IWARP:
 		p_ramrod->personality = PERSONALITY_RDMA_AND_ETH;
 		break;
 	default:
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index adb700512baa..a3a70ade411f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -503,7 +503,7 @@ void qede_fill_rss_params(struct qede_dev *edev,
 void qede_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti);
 void qede_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti);
 
-int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp);
+int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp);
 
 #ifdef CONFIG_DCB
 void qede_set_dcbnl_ops(struct net_device *ndev);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index f79e36e4060a..c1a0708a7d7c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1065,7 +1065,7 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog)
 	return 0;
 }
 
-int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 6fc854b120b0..48ec4c56cddf 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1004,6 +1004,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
 
 	xdp.data_hard_start = page_address(bd->data);
 	xdp.data = xdp.data_hard_start + *data_offset;
+	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + *len;
 
 	/* Queues always have a full reset currently, so for the time
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index e5ee9f274a71..8f9b3eb82137 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -556,7 +556,7 @@ static const struct net_device_ops qede_netdev_ops = {
 	.ndo_udp_tunnel_add = qede_udp_tunnel_add,
 	.ndo_udp_tunnel_del = qede_udp_tunnel_del,
 	.ndo_features_check = qede_features_check,
-	.ndo_xdp = qede_xdp,
+	.ndo_bpf = qede_xdp,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer = qede_rx_flow_steer,
 #endif
@@ -594,7 +594,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = {
 	.ndo_udp_tunnel_add = qede_udp_tunnel_add,
 	.ndo_udp_tunnel_del = qede_udp_tunnel_del,
 	.ndo_features_check = qede_features_check,
-	.ndo_xdp = qede_xdp,
+	.ndo_bpf = qede_xdp,
 };
 
 /* -------------------------------------------------------------------------
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 2991179c2fd0..05479d435469 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -3891,10 +3891,8 @@ static int ql3xxx_probe(struct pci_dev *pdev,
 	INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work);
 	INIT_DELAYED_WORK(&qdev->link_state_work, ql_link_state_machine_work);
 
-	init_timer(&qdev->adapter_timer);
-	qdev->adapter_timer.function = ql3xxx_timer;
+	setup_timer(&qdev->adapter_timer, ql3xxx_timer, (unsigned long)qdev);
 	qdev->adapter_timer.expires = jiffies + HZ * 2;	/* two second delay */
-	qdev->adapter_timer.data = (unsigned long)qdev;
 
 	if (!cards_found) {
 		pr_alert("%s\n", DRV_STRING);
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 29fea74bff2e..7b97a9969046 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -1092,8 +1092,7 @@ static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring,
 {
 	if (!rx_ring->pg_chunk.page) {
 		u64 map;
-		rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP |
-						GFP_ATOMIC,
+		rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC,
 						qdev->lbq_buf_order);
 		if (unlikely(!rx_ring->pg_chunk.page)) {
 			netif_err(qdev, drv, qdev->ndev,
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c
index 384c8bc874f3..4be65d6761b3 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c
@@ -213,7 +213,6 @@ static int ql_idc_req_aen(struct ql_adapter *qdev)
 	/* Get the status data and start up a thread to
 	 * handle the request.
 	 */
-	mbcp = &qdev->idc_mbc;
 	mbcp->out_count = 4;
 	status = ql_get_mb_sts(qdev, mbcp);
 	if (status) {
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 3ed9033e56db..9cbb27263742 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -309,22 +309,12 @@ void emac_mac_mode_config(struct emac_adapter *adpt)
 /* Config descriptor rings */
 static void emac_mac_dma_rings_config(struct emac_adapter *adpt)
 {
-	static const unsigned short tpd_q_offset[] = {
-		EMAC_DESC_CTRL_8,        EMAC_H1TPD_BASE_ADDR_LO,
-		EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO};
-	static const unsigned short rfd_q_offset[] = {
-		EMAC_DESC_CTRL_2,        EMAC_DESC_CTRL_10,
-		EMAC_DESC_CTRL_12,       EMAC_DESC_CTRL_13};
-	static const unsigned short rrd_q_offset[] = {
-		EMAC_DESC_CTRL_5,        EMAC_DESC_CTRL_14,
-		EMAC_DESC_CTRL_15,       EMAC_DESC_CTRL_16};
-
 	/* TPD (Transmit Packet Descriptor) */
 	writel(upper_32_bits(adpt->tx_q.tpd.dma_addr),
 	       adpt->base + EMAC_DESC_CTRL_1);
 
 	writel(lower_32_bits(adpt->tx_q.tpd.dma_addr),
-	       adpt->base + tpd_q_offset[0]);
+	       adpt->base + EMAC_DESC_CTRL_8);
 
 	writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK,
 	       adpt->base + EMAC_DESC_CTRL_9);
@@ -334,9 +324,9 @@ static void emac_mac_dma_rings_config(struct emac_adapter *adpt)
 	       adpt->base + EMAC_DESC_CTRL_0);
 
 	writel(lower_32_bits(adpt->rx_q.rfd.dma_addr),
-	       adpt->base + rfd_q_offset[0]);
+	       adpt->base + EMAC_DESC_CTRL_2);
 	writel(lower_32_bits(adpt->rx_q.rrd.dma_addr),
-	       adpt->base + rrd_q_offset[0]);
+	       adpt->base + EMAC_DESC_CTRL_5);
 
 	writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK,
 	       adpt->base + EMAC_DESC_CTRL_3);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 29ba37a08372..e8ab512ee7e3 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -68,10 +68,10 @@ static void emac_sgmii_link_init(struct emac_adapter *adpt)
 	writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
 }
 
-static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
+static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u8 irq_bits)
 {
 	struct emac_sgmii *phy = &adpt->phy;
-	u32 status;
+	u8 status;
 
 	writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR);
 	writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD);
@@ -86,9 +86,8 @@ static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
 				      EMAC_SGMII_PHY_INTERRUPT_STATUS,
 				      status, !(status & irq_bits), 1,
 				      SGMII_PHY_IRQ_CLR_WAIT_TIME)) {
-		netdev_err(adpt->netdev,
-			   "error: failed clear SGMII irq: status:0x%x bits:0x%x\n",
-			   status, irq_bits);
+		net_err_ratelimited("%s: failed to clear SGMII irq: status:0x%x bits:0x%x\n",
+				    adpt->netdev->name, status, irq_bits);
 		return -EIO;
 	}
 
@@ -109,7 +108,7 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data)
 {
 	struct emac_adapter *adpt = data;
 	struct emac_sgmii *phy = &adpt->phy;
-	u32 status;
+	u8 status;
 
 	status = readl(phy->base + EMAC_SGMII_PHY_INTERRUPT_STATUS);
 	status &= SGMII_ISR_MASK;
@@ -139,10 +138,8 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data)
 		atomic_set(&phy->decode_error_count, 0);
 	}
 
-	if (emac_sgmii_irq_clear(adpt, status)) {
-		netdev_warn(adpt->netdev, "failed to clear SGMII interrupt\n");
+	if (emac_sgmii_irq_clear(adpt, status))
 		schedule_work(&adpt->work_thread);
-	}
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 759543512117..70c92b649b29 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -130,7 +130,7 @@ static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb);
 }
 
-irqreturn_t emac_isr(int _irq, void *data)
+static irqreturn_t emac_isr(int _irq, void *data)
 {
 	struct emac_irq *irq = data;
 	struct emac_adapter *adpt =
@@ -148,9 +148,8 @@ irqreturn_t emac_isr(int _irq, void *data)
 		goto exit;
 
 	if (status & ISR_ERROR) {
-		netif_warn(adpt,  intr, adpt->netdev,
-			   "warning: error irq status 0x%lx\n",
-			   status & ISR_ERROR);
+		net_err_ratelimited("%s: error interrupt 0x%lx\n",
+				    adpt->netdev->name, status & ISR_ERROR);
 		/* reset MAC */
 		schedule_work(&adpt->work_thread);
 	}
@@ -169,7 +168,8 @@ irqreturn_t emac_isr(int _irq, void *data)
 		emac_mac_tx_process(adpt, &adpt->tx_q);
 
 	if (status & ISR_OVER)
-		net_warn_ratelimited("warning: TX/RX overflow\n");
+		net_warn_ratelimited("%s: TX/RX overflow interrupt\n",
+				     adpt->netdev->name);
 
 exit:
 	/* enable the interrupt */
@@ -615,20 +615,11 @@ static int emac_probe(struct platform_device *pdev)
 	u32 reg;
 	int ret;
 
-	/* The EMAC itself is capable of 64-bit DMA, so try that first. */
-	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	/* The TPD buffer address is limited to 45 bits. */
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(45));
 	if (ret) {
-		/* Some platforms may restrict the EMAC's address bus to less
-		 * then the size of DDR. In this case, we need to try a
-		 * smaller mask.  We could try every possible smaller mask,
-		 * but that's overkill.  Instead, just fall to 32-bit, which
-		 * should always work.
-		 */
-		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
-		if (ret) {
-			dev_err(&pdev->dev, "could not set DMA mask\n");
-			return ret;
-		}
+		dev_err(&pdev->dev, "could not set DMA mask\n");
+		return ret;
 	}
 
 	netdev = alloc_etherdev(sizeof(struct emac_adapter));
diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
index 6e2587af47a4..9bb06d284644 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/Kconfig
+++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
@@ -5,6 +5,7 @@
 menuconfig RMNET
 	tristate "RmNet MAP driver"
 	default n
+	select GRO_CELLS
 	---help---
 	  If you select this, you will enable the RMNET module which is used
 	  for handling data in the multiplexing and aggregation protocol (MAP)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 1e33aea59f50..71bee1af71ef 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -61,23 +61,6 @@ rmnet_get_port_rtnl(const struct net_device *real_dev)
 	return rtnl_dereference(real_dev->rx_handler_data);
 }
 
-static struct rmnet_endpoint*
-rmnet_get_endpoint(struct net_device *dev, int config_id)
-{
-	struct rmnet_endpoint *ep;
-	struct rmnet_port *port;
-
-	if (!rmnet_is_real_dev_registered(dev)) {
-		ep = rmnet_vnd_get_endpoint(dev);
-	} else {
-		port = rmnet_get_port_rtnl(dev);
-
-		ep = &port->muxed_ep[config_id];
-	}
-
-	return ep;
-}
-
 static int rmnet_unregister_real_device(struct net_device *real_dev,
 					struct rmnet_port *port)
 {
@@ -98,7 +81,7 @@ static int rmnet_unregister_real_device(struct net_device *real_dev,
 static int rmnet_register_real_device(struct net_device *real_dev)
 {
 	struct rmnet_port *port;
-	int rc;
+	int rc, entry;
 
 	ASSERT_RTNL();
 
@@ -119,27 +102,41 @@ static int rmnet_register_real_device(struct net_device *real_dev)
 	/* hold on to real dev for MAP data */
 	dev_hold(real_dev);
 
+	for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++)
+		INIT_HLIST_HEAD(&port->muxed_ep[entry]);
+
 	netdev_dbg(real_dev, "registered with rmnet\n");
 	return 0;
 }
 
-static void rmnet_set_endpoint_config(struct net_device *dev,
-				      u8 mux_id, u8 rmnet_mode,
-				      struct net_device *egress_dev)
+static void rmnet_unregister_bridge(struct net_device *dev,
+				    struct rmnet_port *port)
 {
-	struct rmnet_endpoint *ep;
+	struct net_device *rmnet_dev, *bridge_dev;
+	struct rmnet_port *bridge_port;
+
+	if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
+		return;
 
-	netdev_dbg(dev, "id %d mode %d dev %s\n",
-		   mux_id, rmnet_mode, egress_dev->name);
+	/* bridge slave handling */
+	if (!port->nr_rmnet_devs) {
+		rmnet_dev = netdev_master_upper_dev_get_rcu(dev);
+		netdev_upper_dev_unlink(dev, rmnet_dev);
 
-	ep = rmnet_get_endpoint(dev, mux_id);
-	/* This config is cleared on every set, so its ok to not
-	 * clear it on a device delete.
-	 */
-	memset(ep, 0, sizeof(struct rmnet_endpoint));
-	ep->rmnet_mode = rmnet_mode;
-	ep->egress_dev = egress_dev;
-	ep->mux_id = mux_id;
+		bridge_dev = port->bridge_ep;
+
+		bridge_port = rmnet_get_port_rtnl(bridge_dev);
+		bridge_port->bridge_ep = NULL;
+		bridge_port->rmnet_mode = RMNET_EPMODE_VND;
+	} else {
+		bridge_dev = port->bridge_ep;
+
+		bridge_port = rmnet_get_port_rtnl(bridge_dev);
+		rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev);
+		netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
+
+		rmnet_unregister_real_device(bridge_dev, bridge_port);
+	}
 }
 
 static int rmnet_newlink(struct net *src_net, struct net_device *dev,
@@ -153,6 +150,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 			    RMNET_EGRESS_FORMAT_MAP;
 	struct net_device *real_dev;
 	int mode = RMNET_EPMODE_VND;
+	struct rmnet_endpoint *ep;
 	struct rmnet_port *port;
 	int err = 0;
 	u16 mux_id;
@@ -164,6 +162,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 	if (!data[IFLA_VLAN_ID])
 		return -EINVAL;
 
+	ep = kzalloc(sizeof(*ep), GFP_ATOMIC);
+	if (!ep)
+		return -ENOMEM;
+
 	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
 
 	err = rmnet_register_real_device(real_dev);
@@ -171,11 +173,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 		goto err0;
 
 	port = rmnet_get_port_rtnl(real_dev);
-	err = rmnet_vnd_newlink(mux_id, dev, port, real_dev);
+	err = rmnet_vnd_newlink(mux_id, dev, port, real_dev, ep);
 	if (err)
 		goto err1;
 
-	err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL);
+	err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack);
 	if (err)
 		goto err2;
 
@@ -183,13 +185,13 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 		   ingress_format, egress_format);
 	port->egress_data_format = egress_format;
 	port->ingress_data_format = ingress_format;
+	port->rmnet_mode = mode;
 
-	rmnet_set_endpoint_config(real_dev, mux_id, mode, dev);
-	rmnet_set_endpoint_config(dev, mux_id, mode, real_dev);
+	hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
 	return 0;
 
 err2:
-	rmnet_vnd_dellink(mux_id, port);
+	rmnet_vnd_dellink(mux_id, port, ep);
 err1:
 	rmnet_unregister_real_device(real_dev, port);
 err0:
@@ -199,6 +201,7 @@ err0:
 static void rmnet_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct net_device *real_dev;
+	struct rmnet_endpoint *ep;
 	struct rmnet_port *port;
 	u8 mux_id;
 
@@ -212,8 +215,15 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
 	port = rmnet_get_port_rtnl(real_dev);
 
 	mux_id = rmnet_vnd_get_mux(dev);
-	rmnet_vnd_dellink(mux_id, port);
 	netdev_upper_dev_unlink(dev, real_dev);
+
+	ep = rmnet_get_endpoint(port, mux_id);
+	if (ep) {
+		hlist_del_init_rcu(&ep->hlnode);
+		rmnet_unregister_bridge(dev, port);
+		rmnet_vnd_dellink(mux_id, port, ep);
+		kfree(ep);
+	}
 	rmnet_unregister_real_device(real_dev, port);
 
 	unregister_netdevice_queue(dev, head);
@@ -222,11 +232,16 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
 static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data)
 {
 	struct rmnet_walk_data *d = data;
+	struct rmnet_endpoint *ep;
 	u8 mux_id;
 
 	mux_id = rmnet_vnd_get_mux(rmnet_dev);
-
-	rmnet_vnd_dellink(mux_id, d->port);
+	ep = rmnet_get_endpoint(d->port, mux_id);
+	if (ep) {
+		hlist_del_init_rcu(&ep->hlnode);
+		rmnet_vnd_dellink(mux_id, d->port, ep);
+		kfree(ep);
+	}
 	netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
 	unregister_netdevice_queue(rmnet_dev, d->head);
 
@@ -252,6 +267,8 @@ static void rmnet_force_unassociate_device(struct net_device *dev)
 	d.port = port;
 
 	rcu_read_lock();
+	rmnet_unregister_bridge(dev, port);
+
 	netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d);
 	rcu_read_unlock();
 	unregister_netdevice_many(&list);
@@ -324,6 +341,77 @@ struct rmnet_port *rmnet_get_port(struct net_device *real_dev)
 		return NULL;
 }
 
+struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id)
+{
+	struct rmnet_endpoint *ep;
+
+	hlist_for_each_entry_rcu(ep, &port->muxed_ep[mux_id], hlnode) {
+		if (ep->mux_id == mux_id)
+			return ep;
+	}
+
+	return NULL;
+}
+
+int rmnet_add_bridge(struct net_device *rmnet_dev,
+		     struct net_device *slave_dev,
+		     struct netlink_ext_ack *extack)
+{
+	struct rmnet_priv *priv = netdev_priv(rmnet_dev);
+	struct net_device *real_dev = priv->real_dev;
+	struct rmnet_port *port, *slave_port;
+	int err;
+
+	port = rmnet_get_port(real_dev);
+
+	/* If there is more than one rmnet dev attached, its probably being
+	 * used for muxing. Skip the briding in that case
+	 */
+	if (port->nr_rmnet_devs > 1)
+		return -EINVAL;
+
+	if (rmnet_is_real_dev_registered(slave_dev))
+		return -EBUSY;
+
+	err = rmnet_register_real_device(slave_dev);
+	if (err)
+		return -EBUSY;
+
+	err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
+					   extack);
+	if (err)
+		return -EINVAL;
+
+	slave_port = rmnet_get_port(slave_dev);
+	slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
+	slave_port->bridge_ep = real_dev;
+
+	port->rmnet_mode = RMNET_EPMODE_BRIDGE;
+	port->bridge_ep = slave_dev;
+
+	netdev_dbg(slave_dev, "registered with rmnet as slave\n");
+	return 0;
+}
+
+int rmnet_del_bridge(struct net_device *rmnet_dev,
+		     struct net_device *slave_dev)
+{
+	struct rmnet_priv *priv = netdev_priv(rmnet_dev);
+	struct net_device *real_dev = priv->real_dev;
+	struct rmnet_port *port, *slave_port;
+
+	port = rmnet_get_port(real_dev);
+	port->rmnet_mode = RMNET_EPMODE_VND;
+	port->bridge_ep = NULL;
+
+	netdev_upper_dev_unlink(slave_dev, rmnet_dev);
+	slave_port = rmnet_get_port(slave_dev);
+	rmnet_unregister_real_device(slave_dev, slave_port);
+
+	netdev_dbg(slave_dev, "removed from rmnet as slave\n");
+	return 0;
+}
+
 /* Startup/Shutdown */
 
 static int __init rmnet_init(void)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index dde4e9f14f4a..c19259eea99e 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -14,19 +14,17 @@
  */
 
 #include <linux/skbuff.h>
+#include <net/gro_cells.h>
 
 #ifndef _RMNET_CONFIG_H_
 #define _RMNET_CONFIG_H_
 
 #define RMNET_MAX_LOGICAL_EP 255
 
-/* Information about the next device to deliver the packet to.
- * Exact usage of this parameter depends on the rmnet_mode.
- */
 struct rmnet_endpoint {
-	u8 rmnet_mode;
 	u8 mux_id;
 	struct net_device *egress_dev;
+	struct hlist_node hlnode;
 };
 
 /* One instance of this structure is instantiated for each real_dev associated
@@ -34,22 +32,41 @@ struct rmnet_endpoint {
  */
 struct rmnet_port {
 	struct net_device *dev;
-	struct rmnet_endpoint local_ep;
-	struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP];
 	u32 ingress_data_format;
 	u32 egress_data_format;
-	struct net_device *rmnet_devices[RMNET_MAX_LOGICAL_EP];
 	u8 nr_rmnet_devs;
+	u8 rmnet_mode;
+	struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
+	struct net_device *bridge_ep;
 };
 
 extern struct rtnl_link_ops rmnet_link_ops;
 
+struct rmnet_vnd_stats {
+	u64 rx_pkts;
+	u64 rx_bytes;
+	u64 tx_pkts;
+	u64 tx_bytes;
+	u32 tx_drops;
+};
+
+struct rmnet_pcpu_stats {
+	struct rmnet_vnd_stats stats;
+	struct u64_stats_sync syncp;
+};
+
 struct rmnet_priv {
-	struct rmnet_endpoint local_ep;
 	u8 mux_id;
 	struct net_device *real_dev;
+	struct rmnet_pcpu_stats __percpu *pcpu_stats;
+	struct gro_cells gro_cells;
 };
 
 struct rmnet_port *rmnet_get_port(struct net_device *real_dev);
-
+struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id);
+int rmnet_add_bridge(struct net_device *rmnet_dev,
+		     struct net_device *slave_dev,
+		     struct netlink_ext_ack *extack);
+int rmnet_del_bridge(struct net_device *rmnet_dev,
+		     struct net_device *slave_dev);
 #endif /* _RMNET_CONFIG_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 540c7622dcb1..29842ccc91a9 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -43,60 +43,23 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
 
 /* Generic handler */
 
-static rx_handler_result_t
-rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep)
+static void
+rmnet_deliver_skb(struct sk_buff *skb)
 {
-	if (!ep->egress_dev)
-		kfree_skb(skb);
-	else
-		rmnet_egress_handler(skb, ep);
-
-	return RX_HANDLER_CONSUMED;
-}
-
-static rx_handler_result_t
-rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep)
-{
-	switch (ep->rmnet_mode) {
-	case RMNET_EPMODE_NONE:
-		return RX_HANDLER_PASS;
-
-	case RMNET_EPMODE_BRIDGE:
-		return rmnet_bridge_handler(skb, ep);
-
-	case RMNET_EPMODE_VND:
-		skb_reset_transport_header(skb);
-		skb_reset_network_header(skb);
-		rmnet_vnd_rx_fixup(skb, skb->dev);
-
-		skb->pkt_type = PACKET_HOST;
-		skb_set_mac_header(skb, 0);
-		netif_receive_skb(skb);
-		return RX_HANDLER_CONSUMED;
-
-	default:
-		kfree_skb(skb);
-		return RX_HANDLER_CONSUMED;
-	}
-}
-
-static rx_handler_result_t
-rmnet_ingress_deliver_packet(struct sk_buff *skb,
-			     struct rmnet_port *port)
-{
-	if (!port) {
-		kfree_skb(skb);
-		return RX_HANDLER_CONSUMED;
-	}
+	struct rmnet_priv *priv = netdev_priv(skb->dev);
 
-	skb->dev = port->local_ep.egress_dev;
+	skb_reset_transport_header(skb);
+	skb_reset_network_header(skb);
+	rmnet_vnd_rx_fixup(skb, skb->dev);
 
-	return rmnet_deliver_skb(skb, &port->local_ep);
+	skb->pkt_type = PACKET_HOST;
+	skb_set_mac_header(skb, 0);
+	gro_cells_receive(&priv->gro_cells, skb);
 }
 
 /* MAP handler */
 
-static rx_handler_result_t
+static void
 __rmnet_map_ingress_handler(struct sk_buff *skb,
 			    struct rmnet_port *port)
 {
@@ -109,53 +72,50 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 		    & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
 
-		kfree_skb(skb);
-		return RX_HANDLER_CONSUMED;
+		goto free_skb;
 	}
 
 	mux_id = RMNET_MAP_GET_MUX_ID(skb);
 	len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb);
 
-	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
-		kfree_skb(skb);
-		return RX_HANDLER_CONSUMED;
-	}
+	if (mux_id >= RMNET_MAX_LOGICAL_EP)
+		goto free_skb;
 
-	ep = &port->muxed_ep[mux_id];
+	ep = rmnet_get_endpoint(port, mux_id);
+	if (!ep)
+		goto free_skb;
 
-	if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING)
-		skb->dev = ep->egress_dev;
+	skb->dev = ep->egress_dev;
 
 	/* Subtract MAP header */
 	skb_pull(skb, sizeof(struct rmnet_map_header));
 	skb_trim(skb, len);
 	rmnet_set_skb_proto(skb);
-	return rmnet_deliver_skb(skb, ep);
+	rmnet_deliver_skb(skb);
+	return;
+
+free_skb:
+	kfree_skb(skb);
 }
 
-static rx_handler_result_t
+static void
 rmnet_map_ingress_handler(struct sk_buff *skb,
 			  struct rmnet_port *port)
 {
 	struct sk_buff *skbn;
-	int rc;
 
 	if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
 		while ((skbn = rmnet_map_deaggregate(skb)) != NULL)
 			__rmnet_map_ingress_handler(skbn, port);
 
 		consume_skb(skb);
-		rc = RX_HANDLER_CONSUMED;
 	} else {
-		rc = __rmnet_map_ingress_handler(skb, port);
+		__rmnet_map_ingress_handler(skb, port);
 	}
-
-	return rc;
 }
 
 static int rmnet_map_egress_handler(struct sk_buff *skb,
-				    struct rmnet_port *port,
-				    struct rmnet_endpoint *ep,
+				    struct rmnet_port *port, u8 mux_id,
 				    struct net_device *orig_dev)
 {
 	int required_headroom, additional_header_len;
@@ -174,10 +134,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 		return RMNET_MAP_CONSUMED;
 
 	if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) {
-		if (ep->mux_id == 0xff)
+		if (mux_id == 0xff)
 			map_header->mux_id = 0;
 		else
-			map_header->mux_id = ep->mux_id;
+			map_header->mux_id = mux_id;
 	}
 
 	skb->protocol = htons(ETH_P_MAP);
@@ -185,6 +145,15 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	return RMNET_MAP_SUCCESS;
 }
 
+static void
+rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev)
+{
+	if (bridge_dev) {
+		skb->dev = bridge_dev;
+		dev_queue_xmit(skb);
+	}
+}
+
 /* Ingress / Egress Entry Points */
 
 /* Processes packet as per ingress data format for receiving device. Logical
@@ -193,56 +162,45 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
  */
 rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
 {
-	struct rmnet_port *port;
 	struct sk_buff *skb = *pskb;
+	struct rmnet_port *port;
 	struct net_device *dev;
-	int rc;
 
 	if (!skb)
-		return RX_HANDLER_CONSUMED;
+		goto done;
 
 	dev = skb->dev;
 	port = rmnet_get_port(dev);
 
-	if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) {
-		rc = rmnet_map_ingress_handler(skb, port);
-	} else {
-		switch (ntohs(skb->protocol)) {
-		case ETH_P_MAP:
-			if (port->local_ep.rmnet_mode ==
-				RMNET_EPMODE_BRIDGE) {
-				rc = rmnet_ingress_deliver_packet(skb, port);
-			} else {
-				kfree_skb(skb);
-				rc = RX_HANDLER_CONSUMED;
-			}
-			break;
-
-		case ETH_P_IP:
-		case ETH_P_IPV6:
-			rc = rmnet_ingress_deliver_packet(skb, port);
-			break;
-
-		default:
-			rc = RX_HANDLER_PASS;
-		}
+	switch (port->rmnet_mode) {
+	case RMNET_EPMODE_VND:
+		if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP)
+			rmnet_map_ingress_handler(skb, port);
+		break;
+	case RMNET_EPMODE_BRIDGE:
+		rmnet_bridge_handler(skb, port->bridge_ep);
+		break;
 	}
 
-	return rc;
+done:
+	return RX_HANDLER_CONSUMED;
 }
 
 /* Modifies packet as per logical endpoint configuration and egress data format
  * for egress device configured in logical endpoint. Packet is then transmitted
  * on the egress device.
  */
-void rmnet_egress_handler(struct sk_buff *skb,
-			  struct rmnet_endpoint *ep)
+void rmnet_egress_handler(struct sk_buff *skb)
 {
 	struct net_device *orig_dev;
 	struct rmnet_port *port;
+	struct rmnet_priv *priv;
+	u8 mux_id;
 
 	orig_dev = skb->dev;
-	skb->dev = ep->egress_dev;
+	priv = netdev_priv(orig_dev);
+	skb->dev = priv->real_dev;
+	mux_id = priv->mux_id;
 
 	port = rmnet_get_port(skb->dev);
 	if (!port) {
@@ -251,7 +209,7 @@ void rmnet_egress_handler(struct sk_buff *skb,
 	}
 
 	if (port->egress_data_format & RMNET_EGRESS_FORMAT_MAP) {
-		switch (rmnet_map_egress_handler(skb, port, ep, orig_dev)) {
+		switch (rmnet_map_egress_handler(skb, port, mux_id, orig_dev)) {
 		case RMNET_MAP_CONSUMED:
 			return;
 
@@ -264,8 +222,7 @@ void rmnet_egress_handler(struct sk_buff *skb,
 		}
 	}
 
-	if (ep->rmnet_mode == RMNET_EPMODE_VND)
-		rmnet_vnd_tx_fixup(skb, orig_dev);
+	rmnet_vnd_tx_fixup(skb, orig_dev);
 
 	dev_queue_xmit(skb);
 }
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
index f2638cf5693c..3537e4ceedb3 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
@@ -18,8 +18,7 @@
 
 #include "rmnet_config.h"
 
-void rmnet_egress_handler(struct sk_buff *skb,
-			  struct rmnet_endpoint *ep);
+void rmnet_egress_handler(struct sk_buff *skb);
 
 rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb);
 
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index ce2302c25b12..3af3fe7b5457 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -80,7 +80,6 @@ u8 rmnet_map_demultiplex(struct sk_buff *skb);
 struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb);
 struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 						  int hdrlen, int pad);
-rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
-				      struct rmnet_port *port);
+void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port);
 
 #endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index d1ea5e21b982..51e604923ac1 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -17,7 +17,7 @@
 #include "rmnet_vnd.h"
 
 static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
-				    struct rmnet_port *rdinfo,
+				    struct rmnet_port *port,
 				    int enable)
 {
 	struct rmnet_map_control_command *cmd;
@@ -37,7 +37,7 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
 		return RX_HANDLER_CONSUMED;
 	}
 
-	ep = &rdinfo->muxed_ep[mux_id];
+	ep = rmnet_get_endpoint(port, mux_id);
 	vnd = ep->egress_dev;
 
 	ip_family = cmd->flow_control.ip_family;
@@ -76,8 +76,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 /* Process MAP command frame and send N/ACK message as appropriate. Message cmd
  * name is decoded here and appropriate handler is called.
  */
-rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
-				      struct rmnet_port *port)
+void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port)
 {
 	struct rmnet_map_control_command *cmd;
 	unsigned char command_name;
@@ -102,5 +101,4 @@ rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
 	}
 	if (rc == RMNET_MAP_COMMAND_ACK)
 		rmnet_map_send_ack(skb, rc);
-	return RX_HANDLER_CONSUMED;
 }
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index 7967198fdd90..49102f922b31 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -19,23 +19,15 @@
 #define RMNET_TX_QUEUE_LEN         1000
 
 /* Constants */
-#define RMNET_EGRESS_FORMAT__RESERVED__         BIT(0)
 #define RMNET_EGRESS_FORMAT_MAP                 BIT(1)
 #define RMNET_EGRESS_FORMAT_AGGREGATION         BIT(2)
 #define RMNET_EGRESS_FORMAT_MUXING              BIT(3)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3         BIT(4)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(5)
 
-#define RMNET_INGRESS_FIX_ETHERNET              BIT(0)
 #define RMNET_INGRESS_FORMAT_MAP                BIT(1)
 #define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(2)
 #define RMNET_INGRESS_FORMAT_DEMUXING           BIT(3)
 #define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(4)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3        BIT(5)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(6)
 
-/* Pass the frame up the stack with no modifications to skb->dev */
-#define RMNET_EPMODE_NONE (0)
 /* Replace skb->dev to a virtual rmnet device and pass up the stack */
 #define RMNET_EPMODE_VND (1)
 /* Pass the frame directly to another device with dev_queue_xmit() */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 7f90d5587653..9caa5e387450 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -27,14 +27,28 @@
 
 void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev)
 {
-	dev->stats.rx_packets++;
-	dev->stats.rx_bytes += skb->len;
+	struct rmnet_priv *priv = netdev_priv(dev);
+	struct rmnet_pcpu_stats *pcpu_ptr;
+
+	pcpu_ptr = this_cpu_ptr(priv->pcpu_stats);
+
+	u64_stats_update_begin(&pcpu_ptr->syncp);
+	pcpu_ptr->stats.rx_pkts++;
+	pcpu_ptr->stats.rx_bytes += skb->len;
+	u64_stats_update_end(&pcpu_ptr->syncp);
 }
 
 void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
 {
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
+	struct rmnet_priv *priv = netdev_priv(dev);
+	struct rmnet_pcpu_stats *pcpu_ptr;
+
+	pcpu_ptr = this_cpu_ptr(priv->pcpu_stats);
+
+	u64_stats_update_begin(&pcpu_ptr->syncp);
+	pcpu_ptr->stats.tx_pkts++;
+	pcpu_ptr->stats.tx_bytes += skb->len;
+	u64_stats_update_end(&pcpu_ptr->syncp);
 }
 
 /* Network Device Operations */
@@ -45,10 +59,10 @@ static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb,
 	struct rmnet_priv *priv;
 
 	priv = netdev_priv(dev);
-	if (priv->local_ep.egress_dev) {
-		rmnet_egress_handler(skb, &priv->local_ep);
+	if (priv->real_dev) {
+		rmnet_egress_handler(skb);
 	} else {
-		dev->stats.tx_dropped++;
+		this_cpu_inc(priv->pcpu_stats->stats.tx_drops);
 		kfree_skb(skb);
 	}
 	return NETDEV_TX_OK;
@@ -70,10 +84,72 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev)
 	return priv->real_dev->ifindex;
 }
 
+static int rmnet_vnd_init(struct net_device *dev)
+{
+	struct rmnet_priv *priv = netdev_priv(dev);
+	int err;
+
+	priv->pcpu_stats = alloc_percpu(struct rmnet_pcpu_stats);
+	if (!priv->pcpu_stats)
+		return -ENOMEM;
+
+	err = gro_cells_init(&priv->gro_cells, dev);
+	if (err) {
+		free_percpu(priv->pcpu_stats);
+		return err;
+	}
+
+	return 0;
+}
+
+static void rmnet_vnd_uninit(struct net_device *dev)
+{
+	struct rmnet_priv *priv = netdev_priv(dev);
+
+	gro_cells_destroy(&priv->gro_cells);
+	free_percpu(priv->pcpu_stats);
+}
+
+static void rmnet_get_stats64(struct net_device *dev,
+			      struct rtnl_link_stats64 *s)
+{
+	struct rmnet_priv *priv = netdev_priv(dev);
+	struct rmnet_vnd_stats total_stats;
+	struct rmnet_pcpu_stats *pcpu_ptr;
+	unsigned int cpu, start;
+
+	memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
+
+	for_each_possible_cpu(cpu) {
+		pcpu_ptr = this_cpu_ptr(priv->pcpu_stats);
+
+		do {
+			start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
+			total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts;
+			total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes;
+			total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts;
+			total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
+
+		total_stats.tx_drops += pcpu_ptr->stats.tx_drops;
+	}
+
+	s->rx_packets = total_stats.rx_pkts;
+	s->rx_bytes = total_stats.rx_bytes;
+	s->tx_packets = total_stats.tx_pkts;
+	s->tx_bytes = total_stats.tx_bytes;
+	s->tx_dropped = total_stats.tx_drops;
+}
+
 static const struct net_device_ops rmnet_vnd_ops = {
 	.ndo_start_xmit = rmnet_vnd_start_xmit,
 	.ndo_change_mtu = rmnet_vnd_change_mtu,
 	.ndo_get_iflink = rmnet_vnd_get_iflink,
+	.ndo_add_slave  = rmnet_add_bridge,
+	.ndo_del_slave  = rmnet_del_bridge,
+	.ndo_init       = rmnet_vnd_init,
+	.ndo_uninit     = rmnet_vnd_uninit,
+	.ndo_get_stats64 = rmnet_get_stats64,
 };
 
 /* Called by kernel whenever a new rmnet<n> device is created. Sets MTU,
@@ -100,17 +176,19 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev)
 
 int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 		      struct rmnet_port *port,
-		      struct net_device *real_dev)
+		      struct net_device *real_dev,
+		      struct rmnet_endpoint *ep)
 {
 	struct rmnet_priv *priv;
 	int rc;
 
-	if (port->rmnet_devices[id])
+	if (ep->egress_dev)
 		return -EINVAL;
 
 	rc = register_netdevice(rmnet_dev);
 	if (!rc) {
-		port->rmnet_devices[id] = rmnet_dev;
+		ep->egress_dev = rmnet_dev;
+		ep->mux_id = id;
 		port->nr_rmnet_devs++;
 
 		rmnet_dev->rtnl_link_ops = &rmnet_link_ops;
@@ -125,12 +203,13 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 	return rc;
 }
 
-int rmnet_vnd_dellink(u8 id, struct rmnet_port *port)
+int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
+		      struct rmnet_endpoint *ep)
 {
-	if (id >= RMNET_MAX_LOGICAL_EP || !port->rmnet_devices[id])
+	if (id >= RMNET_MAX_LOGICAL_EP || !ep->egress_dev)
 		return -EINVAL;
 
-	port->rmnet_devices[id] = NULL;
+	ep->egress_dev = NULL;
 	port->nr_rmnet_devs--;
 	return 0;
 }
@@ -143,21 +222,6 @@ u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev)
 	return priv->mux_id;
 }
 
-/* Gets the logical endpoint configuration for a RmNet virtual network device
- * node. Caller should confirm that devices is a RmNet VND before calling.
- */
-struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *rmnet_dev)
-{
-	struct rmnet_priv *priv;
-
-	if (!rmnet_dev)
-		return NULL;
-
-	priv = netdev_priv(rmnet_dev);
-
-	return &priv->local_ep;
-}
-
 int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable)
 {
 	netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
index 8a4042f0f6bf..71e4c3286951 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -17,11 +17,12 @@
 #define _RMNET_VND_H_
 
 int rmnet_vnd_do_flow_control(struct net_device *dev, int enable);
-struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev);
 int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 		      struct rmnet_port *port,
-		      struct net_device *real_dev);
-int rmnet_vnd_dellink(u8 id, struct rmnet_port *port);
+		      struct net_device *real_dev,
+		      struct rmnet_endpoint *ep);
+int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
+		      struct rmnet_endpoint *ep);
 void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
 void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
 u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev);
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index bed34684994f..7e011c1c1e6e 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -170,6 +170,7 @@ struct net_local {
     spinlock_t lock;
     struct net_device *next_module;
     struct timer_list timer;	/* Media selection timer. */
+    struct net_device *dev;	/* Timer dev. */
     unsigned long last_rx_time;	/* Last Rx, in jiffies, to handle Rx hang. */
     int saved_tx_size;
     unsigned int tx_unit_busy:1;
@@ -184,7 +185,7 @@ struct net_local {
 #define TIMED_CHECKER (HZ/4)
 #ifdef TIMED_CHECKER
 #include <linux/timer.h>
-static void atp_timed_checker(unsigned long ignored);
+static void atp_timed_checker(struct timer_list *t);
 #endif
 
 /* Index to functions, as function prototypes. */
@@ -438,10 +439,9 @@ static int net_open(struct net_device *dev)
 
 	hardware_init(dev);
 
-	init_timer(&lp->timer);
+	lp->dev = dev;
+	timer_setup(&lp->timer, atp_timed_checker, 0);
 	lp->timer.expires = jiffies + TIMED_CHECKER;
-	lp->timer.data = (unsigned long)dev;
-	lp->timer.function = atp_timed_checker;    /* timer handler */
 	add_timer(&lp->timer);
 
 	netif_start_queue(dev);
@@ -710,11 +710,11 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance)
 #ifdef TIMED_CHECKER
 /* This following code fixes a rare (and very difficult to track down)
    problem where the adapter forgets its ethernet address. */
-static void atp_timed_checker(unsigned long data)
+static void atp_timed_checker(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
+	struct net_local *lp = from_timer(lp, t, timer);
+	struct net_device *dev = lp->dev;
 	long ioaddr = dev->base_addr;
-	struct net_local *lp = netdev_priv(dev);
 	int tickssofar = jiffies - lp->last_rx_time;
 	int i;
 
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index a3c949ea7d1a..dcb8c39382e7 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -399,6 +399,12 @@ enum rtl_registers {
 	RxMaxSize	= 0xda,
 	CPlusCmd	= 0xe0,
 	IntrMitigate	= 0xe2,
+
+#define RTL_COALESCE_MASK	0x0f
+#define RTL_COALESCE_SHIFT	4
+#define RTL_COALESCE_T_MAX	(RTL_COALESCE_MASK)
+#define RTL_COALESCE_FRAME_MAX	(RTL_COALESCE_MASK << 2)
+
 	RxDescAddrLow	= 0xe4,
 	RxDescAddrHigh	= 0xe8,
 	EarlyTxThres	= 0xec,	/* 8169. Unit of 32 bytes. */
@@ -795,6 +801,7 @@ struct rtl8169_private {
 	u16 cp_cmd;
 
 	u16 event_slow;
+	const struct rtl_coalesce_info *coalesce_info;
 
 	struct mdio_ops {
 		void (*write)(struct rtl8169_private *, int, int);
@@ -1975,8 +1982,6 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
 		rtl_writephy(tp, MII_ADVERTISE, auto_nego);
 		rtl_writephy(tp, MII_CTRL1000, giga_ctrl);
 	} else {
-		giga_ctrl = 0;
-
 		if (speed == SPEED_10)
 			bmcr = 0;
 		else if (speed == SPEED_100)
@@ -2363,10 +2368,229 @@ static int rtl8169_nway_reset(struct net_device *dev)
 	return mii_nway_restart(&tp->mii);
 }
 
+/*
+ * Interrupt coalescing
+ *
+ * > 1 - the availability of the IntrMitigate (0xe2) register through the
+ * >     8169, 8168 and 810x line of chipsets
+ *
+ * 8169, 8168, and 8136(810x) serial chipsets support it.
+ *
+ * > 2 - the Tx timer unit at gigabit speed
+ *
+ * The unit of the timer depends on both the speed and the setting of CPlusCmd
+ * (0xe0) bit 1 and bit 0.
+ *
+ * For 8169
+ * bit[1:0] \ speed        1000M           100M            10M
+ * 0 0                     320ns           2.56us          40.96us
+ * 0 1                     2.56us          20.48us         327.7us
+ * 1 0                     5.12us          40.96us         655.4us
+ * 1 1                     10.24us         81.92us         1.31ms
+ *
+ * For the other
+ * bit[1:0] \ speed        1000M           100M            10M
+ * 0 0                     5us             2.56us          40.96us
+ * 0 1                     40us            20.48us         327.7us
+ * 1 0                     80us            40.96us         655.4us
+ * 1 1                     160us           81.92us         1.31ms
+ */
+
+/* rx/tx scale factors for one particular CPlusCmd[0:1] value */
+struct rtl_coalesce_scale {
+	/* Rx / Tx */
+	u32 nsecs[2];
+};
+
+/* rx/tx scale factors for all CPlusCmd[0:1] cases */
+struct rtl_coalesce_info {
+	u32 speed;
+	struct rtl_coalesce_scale scalev[4];	/* each CPlusCmd[0:1] case */
+};
+
+/* produce (r,t) pairs with each being in series of *1, *8, *8*2, *8*2*2 */
+#define rxtx_x1822(r, t) {		\
+	{{(r),		(t)}},		\
+	{{(r)*8,	(t)*8}},	\
+	{{(r)*8*2,	(t)*8*2}},	\
+	{{(r)*8*2*2,	(t)*8*2*2}},	\
+}
+static const struct rtl_coalesce_info rtl_coalesce_info_8169[] = {
+	/* speed	delays:     rx00   tx00	*/
+	{ SPEED_10,	rxtx_x1822(40960, 40960)	},
+	{ SPEED_100,	rxtx_x1822( 2560,  2560)	},
+	{ SPEED_1000,	rxtx_x1822(  320,   320)	},
+	{ 0 },
+};
+
+static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = {
+	/* speed	delays:     rx00   tx00	*/
+	{ SPEED_10,	rxtx_x1822(40960, 40960)	},
+	{ SPEED_100,	rxtx_x1822( 2560,  2560)	},
+	{ SPEED_1000,	rxtx_x1822( 5000,  5000)	},
+	{ 0 },
+};
+#undef rxtx_x1822
+
+/* get rx/tx scale vector corresponding to current speed */
+static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	struct ethtool_link_ksettings ecmd;
+	const struct rtl_coalesce_info *ci;
+	int rc;
+
+	rc = rtl8169_get_link_ksettings(dev, &ecmd);
+	if (rc < 0)
+		return ERR_PTR(rc);
+
+	for (ci = tp->coalesce_info; ci->speed != 0; ci++) {
+		if (ecmd.base.speed == ci->speed) {
+			return ci;
+		}
+	}
+
+	return ERR_PTR(-ELNRNG);
+}
+
+static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	void __iomem *ioaddr = tp->mmio_addr;
+	const struct rtl_coalesce_info *ci;
+	const struct rtl_coalesce_scale *scale;
+	struct {
+		u32 *max_frames;
+		u32 *usecs;
+	} coal_settings [] = {
+		{ &ec->rx_max_coalesced_frames, &ec->rx_coalesce_usecs },
+		{ &ec->tx_max_coalesced_frames, &ec->tx_coalesce_usecs }
+	}, *p = coal_settings;
+	int i;
+	u16 w;
+
+	memset(ec, 0, sizeof(*ec));
+
+	/* get rx/tx scale corresponding to current speed and CPlusCmd[0:1] */
+	ci = rtl_coalesce_info(dev);
+	if (IS_ERR(ci))
+		return PTR_ERR(ci);
+
+	scale = &ci->scalev[RTL_R16(CPlusCmd) & 3];
+
+	/* read IntrMitigate and adjust according to scale */
+	for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+		*p->max_frames = (w & RTL_COALESCE_MASK) << 2;
+		w >>= RTL_COALESCE_SHIFT;
+		*p->usecs = w & RTL_COALESCE_MASK;
+	}
+
+	for (i = 0; i < 2; i++) {
+		p = coal_settings + i;
+		*p->usecs = (*p->usecs * scale->nsecs[i]) / 1000;
+
+		/*
+		 * ethtool_coalesce says it is illegal to set both usecs and
+		 * max_frames to 0.
+		 */
+		if (!*p->usecs && !*p->max_frames)
+			*p->max_frames = 1;
+	}
+
+	return 0;
+}
+
+/* choose appropriate scale factor and CPlusCmd[0:1] for (speed, nsec) */
+static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
+			struct net_device *dev, u32 nsec, u16 *cp01)
+{
+	const struct rtl_coalesce_info *ci;
+	u16 i;
+
+	ci = rtl_coalesce_info(dev);
+	if (IS_ERR(ci))
+		return ERR_CAST(ci);
+
+	for (i = 0; i < 4; i++) {
+		u32 rxtx_maxscale = max(ci->scalev[i].nsecs[0],
+					ci->scalev[i].nsecs[1]);
+		if (nsec <= rxtx_maxscale * RTL_COALESCE_T_MAX) {
+			*cp01 = i;
+			return &ci->scalev[i];
+		}
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	void __iomem *ioaddr = tp->mmio_addr;
+	const struct rtl_coalesce_scale *scale;
+	struct {
+		u32 frames;
+		u32 usecs;
+	} coal_settings [] = {
+		{ ec->rx_max_coalesced_frames, ec->rx_coalesce_usecs },
+		{ ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs }
+	}, *p = coal_settings;
+	u16 w = 0, cp01;
+	int i;
+
+	scale = rtl_coalesce_choose_scale(dev,
+			max(p[0].usecs, p[1].usecs) * 1000, &cp01);
+	if (IS_ERR(scale))
+		return PTR_ERR(scale);
+
+	for (i = 0; i < 2; i++, p++) {
+		u32 units;
+
+		/*
+		 * accept max_frames=1 we returned in rtl_get_coalesce.
+		 * accept it not only when usecs=0 because of e.g. the following scenario:
+		 *
+		 * - both rx_usecs=0 & rx_frames=0 in hardware (no delay on RX)
+		 * - rtl_get_coalesce returns rx_usecs=0, rx_frames=1
+		 * - then user does `ethtool -C eth0 rx-usecs 100`
+		 *
+		 * since ethtool sends to kernel whole ethtool_coalesce
+		 * settings, if we do not handle rx_usecs=!0, rx_frames=1
+		 * we'll reject it below in `frames % 4 != 0`.
+		 */
+		if (p->frames == 1) {
+			p->frames = 0;
+		}
+
+		units = p->usecs * 1000 / scale->nsecs[i];
+		if (p->frames > RTL_COALESCE_FRAME_MAX || p->frames % 4)
+			return -EINVAL;
+
+		w <<= RTL_COALESCE_SHIFT;
+		w |= units;
+		w <<= RTL_COALESCE_SHIFT;
+		w |= p->frames >> 2;
+	}
+
+	rtl_lock_work(tp);
+
+	RTL_W16(IntrMitigate, swab16(w));
+
+	tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
+	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_R16(CPlusCmd);
+
+	rtl_unlock_work(tp);
+
+	return 0;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.get_drvinfo		= rtl8169_get_drvinfo,
 	.get_regs_len		= rtl8169_get_regs_len,
 	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= rtl_get_coalesce,
+	.set_coalesce		= rtl_set_coalesce,
 	.set_settings		= rtl8169_set_settings,
 	.get_msglevel		= rtl8169_get_msglevel,
 	.set_msglevel		= rtl8169_set_msglevel,
@@ -4401,10 +4625,9 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
 		schedule_work(&tp->wk.work);
 }
 
-static void rtl8169_phy_timer(unsigned long __opaque)
+static void rtl8169_phy_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)__opaque;
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = from_timer(tp, t, timer);
 
 	rtl_schedule_task(tp, RTL_FLAG_TASK_PHY_PENDING);
 }
@@ -8062,6 +8285,7 @@ static const struct rtl_cfg_info {
 	unsigned int align;
 	u16 event_slow;
 	unsigned features;
+	const struct rtl_coalesce_info *coalesce_info;
 	u8 default_ver;
 } rtl_cfg_infos [] = {
 	[RTL_CFG_0] = {
@@ -8070,6 +8294,7 @@ static const struct rtl_cfg_info {
 		.align		= 0,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver,
 		.features	= RTL_FEATURE_GMII,
+		.coalesce_info	= rtl_coalesce_info_8169,
 		.default_ver	= RTL_GIGA_MAC_VER_01,
 	},
 	[RTL_CFG_1] = {
@@ -8078,6 +8303,7 @@ static const struct rtl_cfg_info {
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow,
 		.features	= RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_11,
 	},
 	[RTL_CFG_2] = {
@@ -8087,6 +8313,7 @@ static const struct rtl_cfg_info {
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver |
 				  PCSTimeout,
 		.features	= RTL_FEATURE_MSI,
+		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_13,
 	}
 };
@@ -8450,11 +8677,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	tp->hw_start = cfg->hw_start;
 	tp->event_slow = cfg->event_slow;
+	tp->coalesce_info = cfg->coalesce_info;
 
 	tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ?
 		~(RxBOVF | RxFOVF) : ~0;
 
-	setup_timer(&tp->timer, rtl8169_phy_timer, (unsigned long)dev);
+	timer_setup(&tp->timer, rtl8169_phy_timer, 0);
 
 	tp->rtl_fw = RTL_FIRMWARE_UNKNOWN;
 
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index fdf30bfa403b..2b962d349f5f 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -403,8 +403,9 @@ static void ravb_emac_init(struct net_device *ndev)
 	/* Receive frame limit set register */
 	ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
 
-	/* PAUSE prohibition */
+	/* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
 	ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) |
+		   (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) |
 		   ECMR_TE | ECMR_RE, ECMR);
 
 	ravb_set_rate(ndev);
@@ -520,6 +521,19 @@ static void ravb_get_tx_tstamp(struct net_device *ndev)
 	}
 }
 
+static void ravb_rx_csum(struct sk_buff *skb)
+{
+	u8 *hw_csum;
+
+	/* The hardware checksum is 2 bytes appended to packet data */
+	if (unlikely(skb->len < 2))
+		return;
+	hw_csum = skb_tail_pointer(skb) - 2;
+	skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb_trim(skb, skb->len - 2);
+}
+
 /* Packet receive function for Ethernet AVB */
 static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 {
@@ -587,8 +601,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 				ts.tv_nsec = le32_to_cpu(desc->ts_n);
 				shhwtstamps->hwtstamp = timespec64_to_ktime(ts);
 			}
+
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, ndev);
+			if (ndev->features & NETIF_F_RXCSUM)
+				ravb_rx_csum(skb);
 			napi_gro_receive(&priv->napi[q], skb);
 			stats->rx_packets++;
 			stats->rx_bytes += pkt_len;
@@ -1337,20 +1354,15 @@ static void ravb_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
 
-	wol->supported = 0;
-	wol->wolopts = 0;
-
-	if (priv->clk) {
-		wol->supported = WAKE_MAGIC;
-		wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0;
-	}
+	wol->supported = WAKE_MAGIC;
+	wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0;
 }
 
 static int ravb_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
 
-	if (!priv->clk || wol->wolopts & ~WAKE_MAGIC)
+	if (wol->wolopts & ~WAKE_MAGIC)
 		return -EOPNOTSUPP;
 
 	priv->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
@@ -1842,6 +1854,38 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 	return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	/* Disable TX and RX */
+	ravb_rcv_snd_disable(ndev);
+
+	/* Modify RX Checksum setting */
+	ravb_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0);
+
+	/* Enable TX and RX */
+	ravb_rcv_snd_enable(ndev);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ravb_set_features(struct net_device *ndev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = ndev->features ^ features;
+
+	if (changed & NETIF_F_RXCSUM)
+		ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM);
+
+	ndev->features = features;
+
+	return 0;
+}
+
 static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_open		= ravb_open,
 	.ndo_stop		= ravb_close,
@@ -1853,6 +1897,7 @@ static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_do_ioctl		= ravb_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_features	= ravb_set_features,
 };
 
 /* MDIO bus init function */
@@ -1912,22 +1957,12 @@ MODULE_DEVICE_TABLE(of, ravb_match_table);
 
 static int ravb_set_gti(struct net_device *ndev)
 {
-
+	struct ravb_private *priv = netdev_priv(ndev);
 	struct device *dev = ndev->dev.parent;
-	struct device_node *np = dev->of_node;
 	unsigned long rate;
-	struct clk *clk;
 	uint64_t inc;
 
-	clk = of_clk_get(np, 0);
-	if (IS_ERR(clk)) {
-		dev_err(dev, "could not get clock\n");
-		return PTR_ERR(clk);
-	}
-
-	rate = clk_get_rate(clk);
-	clk_put(clk);
-
+	rate = clk_get_rate(priv->clk);
 	if (!rate)
 		return -EINVAL;
 
@@ -2004,6 +2039,9 @@ static int ravb_probe(struct platform_device *pdev)
 	if (!ndev)
 		return -ENOMEM;
 
+	ndev->features = NETIF_F_RXCSUM;
+	ndev->hw_features = NETIF_F_RXCSUM;
+
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -2073,10 +2111,11 @@ static int ravb_probe(struct platform_device *pdev)
 
 	priv->chip_id = chip_id;
 
-	/* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
 	priv->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(priv->clk))
-		priv->clk = NULL;
+	if (IS_ERR(priv->clk)) {
+		error = PTR_ERR(priv->clk);
+		goto out_release;
+	}
 
 	/* Set function */
 	ndev->netdev_ops = &ravb_netdev_ops;
@@ -2144,8 +2183,7 @@ static int ravb_probe(struct platform_device *pdev)
 	if (error)
 		goto out_napi_del;
 
-	if (priv->clk)
-		device_set_wakeup_capable(&pdev->dev, 1);
+	device_set_wakeup_capable(&pdev->dev, 1);
 
 	/* Print device information */
 	netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n",
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d2e88a30f57b..7e060aa9fbed 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -594,7 +594,7 @@ static struct sh_eth_cpu_data r8a7740_data = {
 };
 
 /* There is CPU dependent code */
-static void sh_eth_set_rate_r8a777x(struct net_device *ndev)
+static void sh_eth_set_rate_rcar(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 
@@ -608,10 +608,10 @@ static void sh_eth_set_rate_r8a777x(struct net_device *ndev)
 	}
 }
 
-/* R8A7778/9 */
-static struct sh_eth_cpu_data r8a777x_data = {
+/* R-Car Gen1 */
+static struct sh_eth_cpu_data rcar_gen1_data = {
 	.set_duplex	= sh_eth_set_duplex,
-	.set_rate	= sh_eth_set_rate_r8a777x,
+	.set_rate	= sh_eth_set_rate_rcar,
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
@@ -635,10 +635,10 @@ static struct sh_eth_cpu_data r8a777x_data = {
 	.hw_swap	= 1,
 };
 
-/* R8A7790/1 */
-static struct sh_eth_cpu_data r8a779x_data = {
+/* R-Car Gen2 and RZ/G1 */
+static struct sh_eth_cpu_data rcar_gen2_data = {
 	.set_duplex	= sh_eth_set_duplex,
-	.set_rate	= sh_eth_set_rate_r8a777x,
+	.set_rate	= sh_eth_set_rate_rcar,
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
@@ -3086,15 +3086,17 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
 
 static const struct of_device_id sh_eth_match_table[] = {
 	{ .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data },
-	{ .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data },
-	{ .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data },
-	{ .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data },
-	{ .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data },
-	{ .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data },
-	{ .compatible = "renesas,ether-r8a7791", .data = &r8a779x_data },
-	{ .compatible = "renesas,ether-r8a7793", .data = &r8a779x_data },
-	{ .compatible = "renesas,ether-r8a7794", .data = &r8a779x_data },
+	{ .compatible = "renesas,ether-r8a7743", .data = &rcar_gen2_data },
+	{ .compatible = "renesas,ether-r8a7745", .data = &rcar_gen2_data },
+	{ .compatible = "renesas,ether-r8a7778", .data = &rcar_gen1_data },
+	{ .compatible = "renesas,ether-r8a7779", .data = &rcar_gen1_data },
+	{ .compatible = "renesas,ether-r8a7790", .data = &rcar_gen2_data },
+	{ .compatible = "renesas,ether-r8a7791", .data = &rcar_gen2_data },
+	{ .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data },
+	{ .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data },
 	{ .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data },
+	{ .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data },
+	{ .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sh_eth_match_table);
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 89831adb8eb7..fd35d8004a78 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -105,9 +105,9 @@ void sxgbe_disable_eee_mode(struct sxgbe_priv_data * const priv)
  *  If there is no data transfer and if we are not in LPI state,
  *  then MAC Transmitter can be moved to LPI state.
  */
-static void sxgbe_eee_ctrl_timer(unsigned long arg)
+static void sxgbe_eee_ctrl_timer(struct timer_list *t)
 {
-	struct sxgbe_priv_data *priv = (struct sxgbe_priv_data *)arg;
+	struct sxgbe_priv_data *priv = from_timer(priv, t, eee_ctrl_timer);
 
 	sxgbe_enable_eee_mode(priv);
 	mod_timer(&priv->eee_ctrl_timer, SXGBE_LPI_TIMER(eee_timer));
@@ -134,8 +134,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv)
 			return false;
 
 		priv->eee_active = 1;
-		setup_timer(&priv->eee_ctrl_timer, sxgbe_eee_ctrl_timer,
-			    (unsigned long)priv);
+		timer_setup(&priv->eee_ctrl_timer, sxgbe_eee_ctrl_timer, 0);
 		priv->eee_ctrl_timer.expires = SXGBE_LPI_TIMER(eee_timer);
 		add_timer(&priv->eee_ctrl_timer);
 
@@ -1002,13 +1001,13 @@ static void sxgbe_disable_mtl_engine(struct sxgbe_priv_data *priv)
 
 /**
  * sxgbe_tx_timer: mitigation sw timer for tx.
- * @data: data pointer
+ * @t: timer pointer
  * Description:
  * This is the timer handler to directly invoke the sxgbe_tx_clean.
  */
-static void sxgbe_tx_timer(unsigned long data)
+static void sxgbe_tx_timer(struct timer_list *t)
 {
-	struct sxgbe_tx_queue *p = (struct sxgbe_tx_queue *)data;
+	struct sxgbe_tx_queue *p = from_timer(p, t, txtimer);
 	sxgbe_tx_queue_clean(p);
 }
 
@@ -1028,8 +1027,7 @@ static void sxgbe_tx_init_coalesce(struct sxgbe_priv_data *priv)
 		struct sxgbe_tx_queue *p = priv->txq[queue_num];
 		p->tx_coal_frames =  SXGBE_TX_FRAMES;
 		p->tx_coal_timer = SXGBE_COAL_TX_TIMER;
-		setup_timer(&p->txtimer, sxgbe_tx_timer,
-			    (unsigned long)&priv->txq[queue_num]);
+		timer_setup(&p->txtimer, sxgbe_tx_timer, 0);
 		p->txtimer.expires = SXGBE_COAL_TIMER(p->tx_coal_timer);
 		add_timer(&p->txtimer);
 	}
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 244c1e171017..c5bc124b41a9 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -170,9 +170,11 @@ ether3_setbuffer(struct net_device *dev, buffer_rw_t read, int start)
 /*
  * Switch LED off...
  */
-static void ether3_ledoff(unsigned long data)
+static void ether3_ledoff(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
+	struct dev_priv *private = from_timer(private, t, timer);
+	struct net_device *dev = private->dev;
+
 	ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2);
 }
 
@@ -183,8 +185,6 @@ static inline void ether3_ledon(struct net_device *dev)
 {
 	del_timer(&priv(dev)->timer);
 	priv(dev)->timer.expires = jiffies + HZ / 50; /* leave on for 1/50th second */
-	priv(dev)->timer.data = (unsigned long)dev;
-	priv(dev)->timer.function = ether3_ledoff;
 	add_timer(&priv(dev)->timer);
 	if (priv(dev)->regs.config2 & CFG2_CTRLO)
 		ether3_outw(priv(dev)->regs.config2 &= ~CFG2_CTRLO, REG_CONFIG2);
@@ -783,7 +783,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id)
 
 	ether3_addr(dev->dev_addr, ec);
 
-	init_timer(&priv(dev)->timer);
+	priv(dev)->dev = dev;
+	timer_setup(&priv(dev)->timer, ether3_ledoff, 0);
 
 	/* Reset card...
 	 */
diff --git a/drivers/net/ethernet/seeq/ether3.h b/drivers/net/ethernet/seeq/ether3.h
index 2db63b08bdf3..be19e5fa5cf2 100644
--- a/drivers/net/ethernet/seeq/ether3.h
+++ b/drivers/net/ethernet/seeq/ether3.h
@@ -165,6 +165,7 @@ struct dev_priv {
     unsigned char tx_tail;		/* buffer nr of transmitting packet	 */
     unsigned int rx_head;		/* address to fetch next packet from	 */
     struct timer_list timer;
+    struct net_device *dev;
     int broken;				/* 0 = ok, 1 = something went wrong	 */
 };
 
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index a95a46bcd339..e566dbb3343d 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -674,6 +674,10 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	efx->rx_packet_len_offset =
 		ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE;
 
+	if (nic_data->datapath_caps &
+	    (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_INCLUDE_FCS_LBN))
+		efx->net_dev->hw_features |= NETIF_F_RXFCS;
+
 	rc = efx_mcdi_port_get_number(efx);
 	if (rc < 0)
 		goto fail5;
@@ -3199,11 +3203,15 @@ static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel,
 					   const efx_qword_t *event)
 {
 	struct efx_nic *efx = channel->efx;
+	bool handled = false;
 
 	if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) {
-		if (!efx->loopback_selftest)
-			channel->n_rx_eth_crc_err += n_packets;
-		return EFX_RX_PKT_DISCARD;
+		if (!(efx->net_dev->features & NETIF_F_RXALL)) {
+			if (!efx->loopback_selftest)
+				channel->n_rx_eth_crc_err += n_packets;
+			return EFX_RX_PKT_DISCARD;
+		}
+		handled = true;
 	}
 	if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) {
 		if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN &&
@@ -3274,7 +3282,7 @@ static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel,
 		return 0;
 	}
 
-	WARN_ON(1); /* No error bits were recognised */
+	WARN_ON(!handled); /* No error bits were recognised */
 	return 0;
 }
 
@@ -5726,7 +5734,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 		 * MCFW do not support VFs.
 		 */
 		rc = efx_ef10_vport_set_mac_address(efx);
-	} else {
+	} else if (rc) {
 		efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC,
 				       sizeof(inbuf), NULL, 0, rc);
 	}
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 016616a63880..e3c492fcaff0 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -471,8 +471,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 
 	rx_queue = &channel->rx_queue;
 	rx_queue->efx = efx;
-	setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
-		    (unsigned long)rx_queue);
+	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 
 	return channel;
 }
@@ -511,8 +510,7 @@ efx_copy_channel(const struct efx_channel *old_channel)
 	rx_queue = &channel->rx_queue;
 	rx_queue->buffer = NULL;
 	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
-	setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
-		    (unsigned long)rx_queue);
+	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 
 	return channel;
 }
@@ -2317,8 +2315,11 @@ static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
 			return rc;
 	}
 
-	/* If Rx VLAN filter is changed, update filters via mac_reconfigure */
-	if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
+	/* If Rx VLAN filter is changed, update filters via mac_reconfigure.
+	 * If rx-fcs is changed, mac_reconfigure updates that too.
+	 */
+	if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
+					  NETIF_F_RXFCS)) {
 		/* efx_set_rx_mode() will schedule MAC work to update filters
 		 * when a new features are finally set in net_dev.
 		 */
@@ -3244,7 +3245,7 @@ static int efx_pci_probe_post_io(struct efx_nic *efx)
 
 	/* Determine netdevice features */
 	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
-			      NETIF_F_TSO | NETIF_F_RXCSUM);
+			      NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
 	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		net_dev->features |= NETIF_F_TSO6;
 	/* Check whether device supports TSO */
@@ -3255,7 +3256,10 @@ static int efx_pci_probe_post_io(struct efx_nic *efx)
 				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
 				   NETIF_F_RXCSUM);
 
-	net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+	net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
+
+	/* Disable receiving frames with bad FCS, by default. */
+	net_dev->features &= ~NETIF_F_RXALL;
 
 	/* Disable VLAN filtering by default.  It may be enforced if
 	 * the feature is fixed (i.e. VLAN filters are required to
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index d407adf59610..52c84b782901 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -46,7 +46,7 @@ void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
 void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
 void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
 void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
-void efx_rx_slow_fill(unsigned long context);
+void efx_rx_slow_fill(struct timer_list *t);
 void __efx_rx_packet(struct efx_channel *channel);
 void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 		   unsigned int n_frags, unsigned int len, u16 flags);
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 7263275fde4a..3d6c91e96589 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -449,8 +449,7 @@ ef4_alloc_channel(struct ef4_nic *efx, int i, struct ef4_channel *old_channel)
 
 	rx_queue = &channel->rx_queue;
 	rx_queue->efx = efx;
-	setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
-		    (unsigned long)rx_queue);
+	timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
 
 	return channel;
 }
@@ -489,8 +488,7 @@ ef4_copy_channel(const struct ef4_channel *old_channel)
 	rx_queue = &channel->rx_queue;
 	rx_queue->buffer = NULL;
 	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
-	setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
-		    (unsigned long)rx_queue);
+	timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0);
 
 	return channel;
 }
diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
index 4f3bb30661ea..a4e4d8ea4078 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.h
+++ b/drivers/net/ethernet/sfc/falcon/efx.h
@@ -45,7 +45,7 @@ void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue);
 void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue);
 void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue);
 void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic);
-void ef4_rx_slow_fill(unsigned long context);
+void ef4_rx_slow_fill(struct timer_list *t);
 void __ef4_rx_packet(struct ef4_channel *channel);
 void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index,
 		   unsigned int n_frags, unsigned int len, u16 flags);
diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c
index cd8bb472d758..6520d7bc8d21 100644
--- a/drivers/net/ethernet/sfc/falcon/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon.c
@@ -1454,10 +1454,11 @@ static void falcon_stats_complete(struct ef4_nic *efx)
 	}
 }
 
-static void falcon_stats_timer_func(unsigned long context)
+static void falcon_stats_timer_func(struct timer_list *t)
 {
-	struct ef4_nic *efx = (struct ef4_nic *)context;
-	struct falcon_nic_data *nic_data = efx->nic_data;
+	struct falcon_nic_data *nic_data = from_timer(nic_data, t,
+						      stats_timer);
+	struct ef4_nic *efx = nic_data->efx;
 
 	spin_lock(&efx->stats_lock);
 
@@ -2295,6 +2296,7 @@ static int falcon_probe_nic(struct ef4_nic *efx)
 	if (!nic_data)
 		return -ENOMEM;
 	efx->nic_data = nic_data;
+	nic_data->efx = efx;
 
 	rc = -ENODEV;
 
@@ -2402,8 +2404,7 @@ static int falcon_probe_nic(struct ef4_nic *efx)
 	}
 
 	nic_data->stats_disable_count = 1;
-	setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func,
-		    (unsigned long)efx);
+	timer_setup(&nic_data->stats_timer, falcon_stats_timer_func, 0);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h
index 54ca457cdb15..07c62dc552cb 100644
--- a/drivers/net/ethernet/sfc/falcon/nic.h
+++ b/drivers/net/ethernet/sfc/falcon/nic.h
@@ -267,6 +267,7 @@ enum {
 /**
  * struct falcon_nic_data - Falcon NIC state
  * @pci_dev2: Secondary function of Falcon A
+ * @efx: ef4_nic pointer
  * @board: Board state and functions
  * @stats: Hardware statistics
  * @stats_disable_count: Nest count for disabling statistics fetches
@@ -280,6 +281,7 @@ enum {
  */
 struct falcon_nic_data {
 	struct pci_dev *pci_dev2;
+	struct ef4_nic *efx;
 	struct falcon_board board;
 	u64 stats[FALCON_STAT_COUNT];
 	unsigned int stats_disable_count;
diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c
index 6a8406dc0c2b..02456ed13a7d 100644
--- a/drivers/net/ethernet/sfc/falcon/rx.c
+++ b/drivers/net/ethernet/sfc/falcon/rx.c
@@ -163,7 +163,7 @@ static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic)
 	do {
 		page = ef4_reuse_page(rx_queue);
 		if (page == NULL) {
-			page = alloc_pages(__GFP_COLD | __GFP_COMP |
+			page = alloc_pages(__GFP_COMP |
 					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
 					   efx->rx_buffer_order);
 			if (unlikely(page == NULL))
@@ -376,9 +376,9 @@ void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic)
 		ef4_nic_notify_rx_desc(rx_queue);
 }
 
-void ef4_rx_slow_fill(unsigned long context)
+void ef4_rx_slow_fill(struct timer_list *t)
 {
-	struct ef4_rx_queue *rx_queue = (struct ef4_rx_queue *)context;
+	struct ef4_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
 
 	/* Post an event to cause NAPI to run and refill the queue */
 	ef4_nic_generate_fill_event(rx_queue);
diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
index 6486814e97dc..3409bbf5b19f 100644
--- a/drivers/net/ethernet/sfc/falcon/tx.c
+++ b/drivers/net/ethernet/sfc/falcon/tx.c
@@ -435,7 +435,7 @@ int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
 	unsigned tc, num_tc;
 	int rc;
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	num_tc = mqprio->num_tc;
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 86454d25a405..5334dc83d926 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -927,6 +927,10 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 	}
 #endif
 
+	if (efx->net_dev->features & NETIF_F_RXALL)
+		/* don't discard frame for CRC error */
+		rx_ev_eth_crc_err = false;
+
 	/* The frame must be discarded if any of these are true. */
 	return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
 		rx_ev_tobe_disc | rx_ev_pause_frm) ?
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 3df872f56289..9c2567b0d93e 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -48,7 +48,7 @@ struct efx_mcdi_async_param {
 	/* followed by request/response buffer */
 };
 
-static void efx_mcdi_timeout_async(unsigned long context);
+static void efx_mcdi_timeout_async(struct timer_list *t);
 static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating,
 			       bool *was_attached_out);
 static bool efx_mcdi_poll_once(struct efx_nic *efx);
@@ -87,8 +87,7 @@ int efx_mcdi_init(struct efx_nic *efx)
 	mcdi->mode = MCDI_MODE_POLL;
 	spin_lock_init(&mcdi->async_lock);
 	INIT_LIST_HEAD(&mcdi->async_list);
-	setup_timer(&mcdi->async_timer, efx_mcdi_timeout_async,
-		    (unsigned long)mcdi);
+	timer_setup(&mcdi->async_timer, efx_mcdi_timeout_async, 0);
 
 	(void) efx_mcdi_poll_reboot(efx);
 	mcdi->new_epoch = true;
@@ -608,9 +607,9 @@ static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno,
 	}
 }
 
-static void efx_mcdi_timeout_async(unsigned long context)
+static void efx_mcdi_timeout_async(struct timer_list *t)
 {
-	struct efx_mcdi_iface *mcdi = (struct efx_mcdi_iface *)context;
+	struct efx_mcdi_iface *mcdi = from_timer(mcdi, t, async_timer);
 
 	efx_mcdi_complete_async(mcdi, true);
 }
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index c7407d129c7d..6e1f282b2976 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -1029,6 +1029,10 @@ int efx_mcdi_set_mac(struct efx_nic *efx)
 	MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
 			      SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
 
+	MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
+			      SET_MAC_IN_FLAG_INCLUDE_FCS,
+			      !!(efx->net_dev->features & NETIF_F_RXFCS));
+
 	switch (efx->wanted_fc) {
 	case EFX_FC_RX | EFX_FC_TX:
 		fcntl = MC_CMD_FCNTL_BIDIR;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 56c2db398def..caa89bf7603e 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -648,11 +648,9 @@ static void efx_ptp_send_times(struct efx_nic *efx,
 	struct pps_event_time now;
 	struct timespec64 limit;
 	struct efx_ptp_data *ptp = efx->ptp_data;
-	struct timespec64 start;
 	int *mc_running = ptp->start.addr;
 
 	pps_get_ts(&now);
-	start = now.ts_real;
 	limit = now.ts_real;
 	timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS);
 
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 42443f434569..cfe76aad79ee 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -163,7 +163,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
 	do {
 		page = efx_reuse_page(rx_queue);
 		if (page == NULL) {
-			page = alloc_pages(__GFP_COLD | __GFP_COMP |
+			page = alloc_pages(__GFP_COMP |
 					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
 					   efx->rx_buffer_order);
 			if (unlikely(page == NULL))
@@ -376,9 +376,9 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
 		efx_nic_notify_rx_desc(rx_queue);
 }
 
-void efx_rx_slow_fill(unsigned long context)
+void efx_rx_slow_fill(struct timer_list *t)
 {
-	struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context;
+	struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
 
 	/* Post an event to cause NAPI to run and refill the queue */
 	efx_nic_generate_fill_event(rx_queue);
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index efb66ea21f27..0ea7e16f2e6e 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -663,7 +663,7 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
 	unsigned tc, num_tc;
 	int rc;
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	num_tc = mqprio->num_tc;
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 9c0488e0f08e..18d533fdf14c 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -764,9 +764,9 @@ static inline void ioc3_setup_duplex(struct ioc3_private *ip)
 	ioc3_w_emcr(ip->emcr);
 }
 
-static void ioc3_timer(unsigned long data)
+static void ioc3_timer(struct timer_list *t)
 {
-	struct ioc3_private *ip = (struct ioc3_private *) data;
+	struct ioc3_private *ip = from_timer(ip, t, ioc3_timer);
 
 	/* Print the link status if it has changed */
 	mii_check_media(&ip->mii, 1, 0);
@@ -818,8 +818,6 @@ out:
 static void ioc3_mii_start(struct ioc3_private *ip)
 {
 	ip->ioc3_timer.expires = jiffies + (12 * HZ)/10;  /* 1.2 sec. */
-	ip->ioc3_timer.data = (unsigned long) ip;
-	ip->ioc3_timer.function = ioc3_timer;
 	add_timer(&ip->ioc3_timer);
 }
 
@@ -1291,7 +1289,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 #endif
 
 	spin_lock_init(&ip->ioc3_lock);
-	init_timer(&ip->ioc3_timer);
+	timer_setup(&ip->ioc3_timer, ioc3_timer, 0);
 
 	ioc3_stop(ip);
 	ioc3_init(dev);
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index 445109bd6910..c2c50522b96d 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1018,10 +1018,10 @@ out_unlock:
 	rtnl_unlock();
 }
 
-static void sis190_phy_timer(unsigned long __opaque)
+static void sis190_phy_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)__opaque;
-	struct sis190_private *tp = netdev_priv(dev);
+	struct sis190_private *tp = from_timer(tp, t, timer);
+	struct net_device *dev = tp->dev;
 
 	if (likely(netif_running(dev)))
 		schedule_work(&tp->phy_task);
@@ -1039,10 +1039,8 @@ static inline void sis190_request_timer(struct net_device *dev)
 	struct sis190_private *tp = netdev_priv(dev);
 	struct timer_list *timer = &tp->timer;
 
-	init_timer(timer);
+	timer_setup(timer, sis190_phy_timer, 0);
 	timer->expires = jiffies + SIS190_PHY_TIMEOUT;
-	timer->data = (unsigned long)dev;
-	timer->function = sis190_phy_timer;
 	add_timer(timer);
 }
 
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 40bd88362e3d..4bb89f74742c 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -218,7 +218,7 @@ static void sis900_init_rxfilter (struct net_device * net_dev);
 static u16 read_eeprom(void __iomem *ioaddr, int location);
 static int mdio_read(struct net_device *net_dev, int phy_id, int location);
 static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val);
-static void sis900_timer(unsigned long data);
+static void sis900_timer(struct timer_list *t);
 static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy);
 static void sis900_tx_timeout(struct net_device *net_dev);
 static void sis900_init_tx_ring(struct net_device *net_dev);
@@ -1065,10 +1065,8 @@ sis900_open(struct net_device *net_dev)
 
 	/* Set the timer to switch to check for link beat and perhaps switch
 	   to an alternate media type. */
-	init_timer(&sis_priv->timer);
+	timer_setup(&sis_priv->timer, sis900_timer, 0);
 	sis_priv->timer.expires = jiffies + HZ;
-	sis_priv->timer.data = (unsigned long)net_dev;
-	sis_priv->timer.function = sis900_timer;
 	add_timer(&sis_priv->timer);
 
 	return 0;
@@ -1302,10 +1300,10 @@ static void sis630_set_eq(struct net_device *net_dev, u8 revision)
  *	link status (ON/OFF) and link mode (10/100/Full/Half)
  */
 
-static void sis900_timer(unsigned long data)
+static void sis900_timer(struct timer_list *t)
 {
-	struct net_device *net_dev = (struct net_device *)data;
-	struct sis900_private *sis_priv = netdev_priv(net_dev);
+	struct sis900_private *sis_priv = from_timer(sis_priv, t, timer);
+	struct net_device *net_dev = sis_priv->mii_info.dev;
 	struct mii_phy *mii_phy = sis_priv->mii;
 	static const int next_tick = 5*HZ;
 	int speed = 0, duplex = 0;
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index 6a0e1d4b597c..949aaef390b6 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -290,7 +290,7 @@ static int read_eeprom(struct epic_private *, int);
 static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *dev, int phy_id, int loc, int val);
 static void epic_restart(struct net_device *dev);
-static void epic_timer(unsigned long data);
+static void epic_timer(struct timer_list *t);
 static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
 static netdev_tx_t epic_start_xmit(struct sk_buff *skb,
@@ -739,10 +739,8 @@ static int epic_open(struct net_device *dev)
 
 	/* Set the timer to switch to check for link beat and perhaps switch
 	   to an alternate media type. */
-	init_timer(&ep->timer);
+	timer_setup(&ep->timer, epic_timer, 0);
 	ep->timer.expires = jiffies + 3*HZ;
-	ep->timer.data = (unsigned long)dev;
-	ep->timer.function = epic_timer;				/* timer handler */
 	add_timer(&ep->timer);
 
 	return rc;
@@ -845,10 +843,10 @@ static void check_media(struct net_device *dev)
 	}
 }
 
-static void epic_timer(unsigned long data)
+static void epic_timer(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)data;
-	struct epic_private *ep = netdev_priv(dev);
+	struct epic_private *ep = from_timer(ep, t, timer);
+	struct net_device *dev = ep->mii.dev;
 	void __iomem *ioaddr = ep->ioaddr;
 	int next_tick = 5*HZ;
 
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index 92c927aec66d..a55f430f6a7b 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -280,7 +280,7 @@ static void set_rx_mode(struct net_device *dev);
 static int s9k_config(struct net_device *dev, struct ifmap *map);
 static void smc_set_xcvr(struct net_device *dev, int if_port);
 static void smc_reset(struct net_device *dev);
-static void media_check(u_long arg);
+static void media_check(struct timer_list *t);
 static void mdio_sync(unsigned int addr);
 static int mdio_read(struct net_device *dev, int phy_id, int loc);
 static void mdio_write(struct net_device *dev, int phy_id, int loc, int value);
@@ -1070,7 +1070,7 @@ static int smc_open(struct net_device *dev)
     smc->packets_waiting = 0;
 
     smc_reset(dev);
-    setup_timer(&smc->media, media_check, (u_long)dev);
+    timer_setup(&smc->media, media_check, 0);
     mod_timer(&smc->media, jiffies + HZ);
 
     return 0;
@@ -1708,10 +1708,10 @@ static void smc_reset(struct net_device *dev)
 
 ======================================================================*/
 
-static void media_check(u_long arg)
+static void media_check(struct timer_list *t)
 {
-    struct net_device *dev = (struct net_device *) arg;
-    struct smc_private *smc = netdev_priv(dev);
+    struct smc_private *smc = from_timer(smc, t, media);
+    struct net_device *dev = smc->mii_if.dev;
     unsigned int ioaddr = dev->base_addr;
     u_short i, media, saved_bank;
     u_short link;
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 97035766c291..e28c0d2c58e9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -159,6 +159,7 @@ config DWMAC_SUN8I
 	tristate "Allwinner sun8i GMAC support"
 	default ARCH_SUNXI
 	depends on OF && (ARCH_SUNXI || COMPILE_TEST)
+	select MDIO_BUS_MUX
 	---help---
 	  Support for Allwinner H3 A83T A64 EMAC ethernet controllers.
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
index 6a9c954492f2..8b50afcdb52d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -118,10 +118,9 @@ int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
 	return ret;
 }
 
-static void pcs_link_timer_callback(unsigned long data)
+static void pcs_link_timer_callback(struct tse_pcs *pcs)
 {
 	u16 val = 0;
-	struct tse_pcs *pcs = (struct tse_pcs *)data;
 	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
 	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
 
@@ -138,12 +137,11 @@ static void pcs_link_timer_callback(unsigned long data)
 	}
 }
 
-static void auto_nego_timer_callback(unsigned long data)
+static void auto_nego_timer_callback(struct tse_pcs *pcs)
 {
 	u16 val = 0;
 	u16 speed = 0;
 	u16 duplex = 0;
-	struct tse_pcs *pcs = (struct tse_pcs *)data;
 	void __iomem *tse_pcs_base = pcs->tse_pcs_base;
 	void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
 
@@ -201,14 +199,14 @@ static void auto_nego_timer_callback(unsigned long data)
 	}
 }
 
-static void aneg_link_timer_callback(unsigned long data)
+static void aneg_link_timer_callback(struct timer_list *t)
 {
-	struct tse_pcs *pcs = (struct tse_pcs *)data;
+	struct tse_pcs *pcs = from_timer(pcs, t, aneg_link_timer);
 
 	if (pcs->autoneg == AUTONEG_ENABLE)
-		auto_nego_timer_callback(data);
+		auto_nego_timer_callback(pcs);
 	else if (pcs->autoneg == AUTONEG_DISABLE)
-		pcs_link_timer_callback(data);
+		pcs_link_timer_callback(pcs);
 }
 
 void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
@@ -237,8 +235,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
 
 		tse_pcs_reset(tse_pcs_base, pcs);
 
-		setup_timer(&pcs->aneg_link_timer,
-			    aneg_link_timer_callback, (unsigned long)pcs);
+		timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback,
+			    0);
 		mod_timer(&pcs->aneg_link_timer, jiffies +
 			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
 	} else if (phy_dev->autoneg == AUTONEG_DISABLE) {
@@ -270,8 +268,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
 
 		tse_pcs_reset(tse_pcs_base, pcs);
 
-		setup_timer(&pcs->aneg_link_timer,
-			    aneg_link_timer_callback, (unsigned long)pcs);
+		timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback,
+			    0);
 		mod_timer(&pcs->aneg_link_timer, jiffies +
 			  msecs_to_jiffies(AUTONEGO_LINK_TIMER));
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e82b4b70b7be..e1e5ac053760 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -442,8 +442,9 @@ struct stmmac_dma_ops {
 	void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
 			 int rxfifosz);
 	void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
-			    int fifosz);
-	void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel);
+			    int fifosz, u8 qmode);
+	void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+			    int fifosz, u8 qmode);
 	/* To track extra statistic (if supported) */
 	void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
 				   void __iomem *ioaddr);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 866444b6c82f..2c6d7c69c8f7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -51,15 +51,11 @@
 #define NSS_COMMON_CLK_SRC_CTRL_RGMII(x)	1
 #define NSS_COMMON_CLK_SRC_CTRL_SGMII(x)	((x >= 2) ? 1 : 0)
 
-#define NSS_COMMON_MACSEC_CTL			0x28
-#define NSS_COMMON_MACSEC_CTL_EXT_BYPASS_EN(x)	(1 << x)
-
 #define NSS_COMMON_GMAC_CTL(x)			(0x30 + (x * 4))
 #define NSS_COMMON_GMAC_CTL_CSYS_REQ		BIT(19)
 #define NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL	BIT(16)
 #define NSS_COMMON_GMAC_CTL_IFG_LIMIT_OFFSET	8
 #define NSS_COMMON_GMAC_CTL_IFG_OFFSET		0
-#define NSS_COMMON_GMAC_CTL_IFG_MASK		0x3f
 
 #define NSS_COMMON_CLK_DIV_RGMII_1000		1
 #define NSS_COMMON_CLK_DIV_RGMII_100		9
@@ -68,9 +64,6 @@
 #define NSS_COMMON_CLK_DIV_SGMII_100		4
 #define NSS_COMMON_CLK_DIV_SGMII_10		49
 
-#define QSGMII_PCS_MODE_CTL			0x68
-#define QSGMII_PCS_MODE_CTL_AUTONEG_EN(x)	BIT((x * 8) + 7)
-
 #define QSGMII_PCS_CAL_LCKDT_CTL		0x120
 #define QSGMII_PCS_CAL_LCKDT_CTL_RST		BIT(19)
 
@@ -83,15 +76,10 @@
 #define QSGMII_PHY_TX_DRIVER_EN			BIT(3)
 #define QSGMII_PHY_QSGMII_EN			BIT(7)
 #define QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET	12
-#define QSGMII_PHY_PHASE_LOOP_GAIN_MASK		0x7
 #define QSGMII_PHY_RX_DC_BIAS_OFFSET		18
-#define QSGMII_PHY_RX_DC_BIAS_MASK		0x3
 #define QSGMII_PHY_RX_INPUT_EQU_OFFSET		20
-#define QSGMII_PHY_RX_INPUT_EQU_MASK		0x3
 #define QSGMII_PHY_CDR_PI_SLEW_OFFSET		22
-#define QSGMII_PHY_CDR_PI_SLEW_MASK		0x3
 #define QSGMII_PHY_TX_DRV_AMP_OFFSET		28
-#define QSGMII_PHY_TX_DRV_AMP_MASK		0xf
 
 struct ipq806x_gmac {
 	struct platform_device *pdev;
@@ -217,7 +205,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
 	 * code and keep it consistent with the Linux convention, we'll number
 	 * them from 0 to 3 here.
 	 */
-	if (gmac->id < 0 || gmac->id > 3) {
+	if (gmac->id > 3) {
 		dev_err(dev, "invalid gmac id\n");
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 39c2122a4f26..e5ff734d4f9b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -17,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/mdio-mux.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -41,14 +42,14 @@
  *				This value is used for disabling properly EMAC
  *				and used as a good starting value in case of the
  *				boot process(uboot) leave some stuff.
- * @internal_phy:		Does the MAC embed an internal PHY
+ * @soc_has_internal_phy:	Does the MAC embed an internal PHY
  * @support_mii:		Does the MAC handle MII
  * @support_rmii:		Does the MAC handle RMII
  * @support_rgmii:		Does the MAC handle RGMII
  */
 struct emac_variant {
 	u32 default_syscon_value;
-	int internal_phy;
+	bool soc_has_internal_phy;
 	bool support_mii;
 	bool support_rmii;
 	bool support_rgmii;
@@ -61,7 +62,8 @@ struct emac_variant {
  * @rst_ephy:	reference to the optional EPHY reset for the internal PHY
  * @variant:	reference to the current board variant
  * @regmap:	regmap for using the syscon
- * @use_internal_phy: Does the current PHY choice imply using the internal PHY
+ * @internal_phy_powered: Does the internal PHY is enabled
+ * @mux_handle:	Internal pointer used by mdio-mux lib
  */
 struct sunxi_priv_data {
 	struct clk *tx_clk;
@@ -70,12 +72,13 @@ struct sunxi_priv_data {
 	struct reset_control *rst_ephy;
 	const struct emac_variant *variant;
 	struct regmap *regmap;
-	bool use_internal_phy;
+	bool internal_phy_powered;
+	void *mux_handle;
 };
 
 static const struct emac_variant emac_variant_h3 = {
 	.default_syscon_value = 0x58000,
-	.internal_phy = PHY_INTERFACE_MODE_MII,
+	.soc_has_internal_phy = true,
 	.support_mii = true,
 	.support_rmii = true,
 	.support_rgmii = true
@@ -83,20 +86,20 @@ static const struct emac_variant emac_variant_h3 = {
 
 static const struct emac_variant emac_variant_v3s = {
 	.default_syscon_value = 0x38000,
-	.internal_phy = PHY_INTERFACE_MODE_MII,
+	.soc_has_internal_phy = true,
 	.support_mii = true
 };
 
 static const struct emac_variant emac_variant_a83t = {
 	.default_syscon_value = 0,
-	.internal_phy = 0,
+	.soc_has_internal_phy = false,
 	.support_mii = true,
 	.support_rgmii = true
 };
 
 static const struct emac_variant emac_variant_a64 = {
 	.default_syscon_value = 0,
-	.internal_phy = 0,
+	.soc_has_internal_phy = false,
 	.support_mii = true,
 	.support_rmii = true,
 	.support_rgmii = true
@@ -195,6 +198,9 @@ static const struct emac_variant emac_variant_a64 = {
 #define H3_EPHY_LED_POL		BIT(17) /* 1: active low, 0: active high */
 #define H3_EPHY_SHUTDOWN	BIT(16) /* 1: shutdown, 0: power up */
 #define H3_EPHY_SELECT		BIT(15) /* 1: internal PHY, 0: external PHY */
+#define H3_EPHY_MUX_MASK	(H3_EPHY_SHUTDOWN | H3_EPHY_SELECT)
+#define DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID	1
+#define DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID	2
 
 /* H3/A64 specific bits */
 #define SYSCON_RMII_EN		BIT(13) /* 1: enable RMII (overrides EPIT) */
@@ -634,6 +640,159 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv)
 	return 0;
 }
 
+/* Search in mdio-mux node for internal PHY node and get its clk/reset */
+static int get_ephy_nodes(struct stmmac_priv *priv)
+{
+	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
+	struct device_node *mdio_mux, *iphynode;
+	struct device_node *mdio_internal;
+	int ret;
+
+	mdio_mux = of_get_child_by_name(priv->device->of_node, "mdio-mux");
+	if (!mdio_mux) {
+		dev_err(priv->device, "Cannot get mdio-mux node\n");
+		return -ENODEV;
+	}
+
+	mdio_internal = of_find_compatible_node(mdio_mux, NULL,
+						"allwinner,sun8i-h3-mdio-internal");
+	if (!mdio_internal) {
+		dev_err(priv->device, "Cannot get internal_mdio node\n");
+		return -ENODEV;
+	}
+
+	/* Seek for internal PHY */
+	for_each_child_of_node(mdio_internal, iphynode) {
+		gmac->ephy_clk = of_clk_get(iphynode, 0);
+		if (IS_ERR(gmac->ephy_clk))
+			continue;
+		gmac->rst_ephy = of_reset_control_get_exclusive(iphynode, NULL);
+		if (IS_ERR(gmac->rst_ephy)) {
+			ret = PTR_ERR(gmac->rst_ephy);
+			if (ret == -EPROBE_DEFER)
+				return ret;
+			continue;
+		}
+		dev_info(priv->device, "Found internal PHY node\n");
+		return 0;
+	}
+	return -ENODEV;
+}
+
+static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv)
+{
+	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
+	int ret;
+
+	if (gmac->internal_phy_powered) {
+		dev_warn(priv->device, "Internal PHY already powered\n");
+		return 0;
+	}
+
+	dev_info(priv->device, "Powering internal PHY\n");
+	ret = clk_prepare_enable(gmac->ephy_clk);
+	if (ret) {
+		dev_err(priv->device, "Cannot enable internal PHY\n");
+		return ret;
+	}
+
+	/* Make sure the EPHY is properly reseted, as U-Boot may leave
+	 * it at deasserted state, and thus it may fail to reset EMAC.
+	 */
+	reset_control_assert(gmac->rst_ephy);
+
+	ret = reset_control_deassert(gmac->rst_ephy);
+	if (ret) {
+		dev_err(priv->device, "Cannot deassert internal phy\n");
+		clk_disable_unprepare(gmac->ephy_clk);
+		return ret;
+	}
+
+	gmac->internal_phy_powered = true;
+
+	return 0;
+}
+
+static int sun8i_dwmac_unpower_internal_phy(struct sunxi_priv_data *gmac)
+{
+	if (!gmac->internal_phy_powered)
+		return 0;
+
+	clk_disable_unprepare(gmac->ephy_clk);
+	reset_control_assert(gmac->rst_ephy);
+	gmac->internal_phy_powered = false;
+	return 0;
+}
+
+/* MDIO multiplexing switch function
+ * This function is called by the mdio-mux layer when it thinks the mdio bus
+ * multiplexer needs to switch.
+ * 'current_child' is the current value of the mux register
+ * 'desired_child' is the value of the 'reg' property of the target child MDIO
+ * node.
+ * The first time this function is called, current_child == -1.
+ * If current_child == desired_child, then the mux is already set to the
+ * correct bus.
+ */
+static int mdio_mux_syscon_switch_fn(int current_child, int desired_child,
+				     void *data)
+{
+	struct stmmac_priv *priv = data;
+	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
+	u32 reg, val;
+	int ret = 0;
+	bool need_power_ephy = false;
+
+	if (current_child ^ desired_child) {
+		regmap_read(gmac->regmap, SYSCON_EMAC_REG, &reg);
+		switch (desired_child) {
+		case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID:
+			dev_info(priv->device, "Switch mux to internal PHY");
+			val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SELECT;
+
+			need_power_ephy = true;
+			break;
+		case DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID:
+			dev_info(priv->device, "Switch mux to external PHY");
+			val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SHUTDOWN;
+			need_power_ephy = false;
+			break;
+		default:
+			dev_err(priv->device, "Invalid child ID %x\n",
+				desired_child);
+			return -EINVAL;
+		}
+		regmap_write(gmac->regmap, SYSCON_EMAC_REG, val);
+		if (need_power_ephy) {
+			ret = sun8i_dwmac_power_internal_phy(priv);
+			if (ret)
+				return ret;
+		} else {
+			sun8i_dwmac_unpower_internal_phy(gmac);
+		}
+		/* After changing syscon value, the MAC need reset or it will
+		 * use the last value (and so the last PHY set).
+		 */
+		ret = sun8i_dwmac_reset(priv);
+	}
+	return ret;
+}
+
+static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv)
+{
+	int ret;
+	struct device_node *mdio_mux;
+	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
+
+	mdio_mux = of_get_child_by_name(priv->device->of_node, "mdio-mux");
+	if (!mdio_mux)
+		return -ENODEV;
+
+	ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn,
+			    &gmac->mux_handle, priv, priv->mii);
+	return ret;
+}
+
 static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 {
 	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
@@ -648,35 +807,25 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 			 "Current syscon value is not the default %x (expect %x)\n",
 			 val, reg);
 
-	if (gmac->variant->internal_phy) {
-		if (!gmac->use_internal_phy) {
-			/* switch to external PHY interface */
-			reg &= ~H3_EPHY_SELECT;
-		} else {
-			reg |= H3_EPHY_SELECT;
-			reg &= ~H3_EPHY_SHUTDOWN;
-			dev_dbg(priv->device, "Select internal_phy %x\n", reg);
-
-			if (of_property_read_bool(priv->plat->phy_node,
-						  "allwinner,leds-active-low"))
-				reg |= H3_EPHY_LED_POL;
-			else
-				reg &= ~H3_EPHY_LED_POL;
-
-			/* Force EPHY xtal frequency to 24MHz. */
-			reg |= H3_EPHY_CLK_SEL;
-
-			ret = of_mdio_parse_addr(priv->device,
-						 priv->plat->phy_node);
-			if (ret < 0) {
-				dev_err(priv->device, "Could not parse MDIO addr\n");
-				return ret;
-			}
-			/* of_mdio_parse_addr returns a valid (0 ~ 31) PHY
-			 * address. No need to mask it again.
-			 */
-			reg |= ret << H3_EPHY_ADDR_SHIFT;
+	if (gmac->variant->soc_has_internal_phy) {
+		if (of_property_read_bool(priv->plat->phy_node,
+					  "allwinner,leds-active-low"))
+			reg |= H3_EPHY_LED_POL;
+		else
+			reg &= ~H3_EPHY_LED_POL;
+
+		/* Force EPHY xtal frequency to 24MHz. */
+		reg |= H3_EPHY_CLK_SEL;
+
+		ret = of_mdio_parse_addr(priv->device, priv->plat->phy_node);
+		if (ret < 0) {
+			dev_err(priv->device, "Could not parse MDIO addr\n");
+			return ret;
 		}
+		/* of_mdio_parse_addr returns a valid (0 ~ 31) PHY
+		 * address. No need to mask it again.
+		 */
+		reg |= 1 << H3_EPHY_ADDR_SHIFT;
 	}
 
 	if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) {
@@ -746,81 +895,21 @@ static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac)
 	regmap_write(gmac->regmap, SYSCON_EMAC_REG, reg);
 }
 
-static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv)
+static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
 {
-	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
-	int ret;
-
-	if (!gmac->use_internal_phy)
-		return 0;
-
-	ret = clk_prepare_enable(gmac->ephy_clk);
-	if (ret) {
-		dev_err(priv->device, "Cannot enable ephy\n");
-		return ret;
-	}
-
-	/* Make sure the EPHY is properly reseted, as U-Boot may leave
-	 * it at deasserted state, and thus it may fail to reset EMAC.
-	 */
-	reset_control_assert(gmac->rst_ephy);
+	struct sunxi_priv_data *gmac = priv;
 
-	ret = reset_control_deassert(gmac->rst_ephy);
-	if (ret) {
-		dev_err(priv->device, "Cannot deassert ephy\n");
-		clk_disable_unprepare(gmac->ephy_clk);
-		return ret;
+	if (gmac->variant->soc_has_internal_phy) {
+		/* sun8i_dwmac_exit could be called with mdiomux uninit */
+		if (gmac->mux_handle)
+			mdio_mux_uninit(gmac->mux_handle);
+		if (gmac->internal_phy_powered)
+			sun8i_dwmac_unpower_internal_phy(gmac);
 	}
 
-	return 0;
-}
-
-static int sun8i_dwmac_unpower_internal_phy(struct sunxi_priv_data *gmac)
-{
-	if (!gmac->use_internal_phy)
-		return 0;
-
-	clk_disable_unprepare(gmac->ephy_clk);
-	reset_control_assert(gmac->rst_ephy);
-	return 0;
-}
-
-/* sun8i_power_phy() - Activate the PHY:
- * In case of error, no need to call sun8i_unpower_phy(),
- * it will be called anyway by sun8i_dwmac_exit()
- */
-static int sun8i_power_phy(struct stmmac_priv *priv)
-{
-	int ret;
-
-	ret = sun8i_dwmac_power_internal_phy(priv);
-	if (ret)
-		return ret;
-
-	ret = sun8i_dwmac_set_syscon(priv);
-	if (ret)
-		return ret;
-
-	/* After changing syscon value, the MAC need reset or it will use
-	 * the last value (and so the last PHY set.
-	 */
-	ret = sun8i_dwmac_reset(priv);
-	if (ret)
-		return ret;
-	return 0;
-}
-
-static void sun8i_unpower_phy(struct sunxi_priv_data *gmac)
-{
 	sun8i_dwmac_unset_syscon(gmac);
-	sun8i_dwmac_unpower_internal_phy(gmac);
-}
-
-static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
-{
-	struct sunxi_priv_data *gmac = priv;
 
-	sun8i_unpower_phy(gmac);
+	reset_control_put(gmac->rst_ephy);
 
 	clk_disable_unprepare(gmac->tx_clk);
 
@@ -849,7 +938,7 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
 	if (!mac)
 		return NULL;
 
-	ret = sun8i_power_phy(priv);
+	ret = sun8i_dwmac_set_syscon(priv);
 	if (ret)
 		return NULL;
 
@@ -889,6 +978,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	struct sunxi_priv_data *gmac;
 	struct device *dev = &pdev->dev;
 	int ret;
+	struct stmmac_priv *priv;
+	struct net_device *ndev;
 
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
 	if (ret)
@@ -932,29 +1023,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	}
 
 	plat_dat->interface = of_get_phy_mode(dev->of_node);
-	if (plat_dat->interface == gmac->variant->internal_phy) {
-		dev_info(&pdev->dev, "Will use internal PHY\n");
-		gmac->use_internal_phy = true;
-		gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);
-		if (IS_ERR(gmac->ephy_clk)) {
-			ret = PTR_ERR(gmac->ephy_clk);
-			dev_err(&pdev->dev, "Cannot get EPHY clock: %d\n", ret);
-			return -EINVAL;
-		}
-
-		gmac->rst_ephy = of_reset_control_get(plat_dat->phy_node, NULL);
-		if (IS_ERR(gmac->rst_ephy)) {
-			ret = PTR_ERR(gmac->rst_ephy);
-			if (ret == -EPROBE_DEFER)
-				return ret;
-			dev_err(&pdev->dev, "No EPHY reset control found %d\n",
-				ret);
-			return -EINVAL;
-		}
-	} else {
-		dev_info(&pdev->dev, "Will use external PHY\n");
-		gmac->use_internal_phy = false;
-	}
 
 	/* platform data specifying hardware features and callbacks.
 	 * hardware features were copied from Allwinner drivers.
@@ -973,12 +1041,45 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		sun8i_dwmac_exit(pdev, plat_dat->bsp_priv);
+		goto dwmac_exit;
+
+	ndev = dev_get_drvdata(&pdev->dev);
+	priv = netdev_priv(ndev);
+	/* The mux must be registered after parent MDIO
+	 * so after stmmac_dvr_probe()
+	 */
+	if (gmac->variant->soc_has_internal_phy) {
+		ret = get_ephy_nodes(priv);
+		if (ret)
+			goto dwmac_exit;
+		ret = sun8i_dwmac_register_mdio_mux(priv);
+		if (ret) {
+			dev_err(&pdev->dev, "Failed to register mux\n");
+			goto dwmac_mux;
+		}
+	} else {
+		ret = sun8i_dwmac_reset(priv);
+		if (ret)
+			goto dwmac_exit;
+	}
 
 	return ret;
+dwmac_mux:
+	sun8i_dwmac_unset_syscon(gmac);
+dwmac_exit:
+	sun8i_dwmac_exit(pdev, plat_dat->bsp_priv);
+return ret;
 }
 
 static const struct of_device_id sun8i_dwmac_match[] = {
+	{ .compatible = "allwinner,sun8i-h3-emac",
+		.data = &emac_variant_h3 },
+	{ .compatible = "allwinner,sun8i-v3s-emac",
+		.data = &emac_variant_v3s },
+	{ .compatible = "allwinner,sun8i-a83t-emac",
+		.data = &emac_variant_a83t },
+	{ .compatible = "allwinner,sun50i-a64-emac",
+		.data = &emac_variant_a64 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sun8i_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index d74cedf2a397..789dad8a07b5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -98,7 +98,7 @@
 #define	GMAC_PCS_IRQ_DEFAULT	(GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK |	\
 				 GMAC_INT_PCS_ANE)
 
-#define	GMAC_INT_DEFAULT_MASK	GMAC_INT_PMT_EN
+#define	GMAC_INT_DEFAULT_MASK	(GMAC_INT_PMT_EN | GMAC_INT_LPI_EN)
 
 enum dwmac4_irq_status {
 	time_stamp_irq = 0x00001000,
@@ -106,6 +106,7 @@ enum dwmac4_irq_status {
 	mmc_tx_irq = 0x00000400,
 	mmc_rx_irq = 0x00000200,
 	mmc_irq = 0x00000100,
+	lpi_irq = 0x00000020,
 	pmt_irq = 0x00000010,
 };
 
@@ -132,6 +133,10 @@ enum power_event {
 #define GMAC4_LPI_CTRL_STATUS_LPITXA	BIT(19)	/* Enable LPI TX Automate */
 #define GMAC4_LPI_CTRL_STATUS_PLS	BIT(17) /* PHY Link Status */
 #define GMAC4_LPI_CTRL_STATUS_LPIEN	BIT(16)	/* LPI Enable */
+#define GMAC4_LPI_CTRL_STATUS_RLPIEX	BIT(3) /* Receive LPI Exit */
+#define GMAC4_LPI_CTRL_STATUS_RLPIEN	BIT(2) /* Receive LPI Entry */
+#define GMAC4_LPI_CTRL_STATUS_TLPIEX	BIT(1) /* Transmit LPI Exit */
+#define GMAC4_LPI_CTRL_STATUS_TLPIEN	BIT(0) /* Transmit LPI Entry */
 
 /* MAC Debug bitmap */
 #define GMAC_DEBUG_TFCSTS_MASK		GENMASK(18, 17)
@@ -225,6 +230,8 @@ enum power_event {
 #define MTL_CHAN_RX_DEBUG(x)		(MTL_CHANX_BASE_ADDR(x) + 0x38)
 
 #define MTL_OP_MODE_RSF			BIT(5)
+#define MTL_OP_MODE_TXQEN_MASK		GENMASK(3, 2)
+#define MTL_OP_MODE_TXQEN_AV		BIT(2)
 #define MTL_OP_MODE_TXQEN		BIT(3)
 #define MTL_OP_MODE_TSF			BIT(1)
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 2f7d7ec59962..f3ed8f7853eb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -580,6 +580,25 @@ static int dwmac4_irq_status(struct mac_device_info *hw,
 		x->irq_receive_pmt_irq_n++;
 	}
 
+	/* MAC tx/rx EEE LPI entry/exit interrupts */
+	if (intr_status & lpi_irq) {
+		/* Clear LPI interrupt by reading MAC_LPI_Control_Status */
+		u32 status = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
+
+		if (status & GMAC4_LPI_CTRL_STATUS_TLPIEN) {
+			ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE;
+			x->irq_tx_path_in_lpi_mode_n++;
+		}
+		if (status & GMAC4_LPI_CTRL_STATUS_TLPIEX) {
+			ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE;
+			x->irq_tx_path_exit_lpi_mode_n++;
+		}
+		if (status & GMAC4_LPI_CTRL_STATUS_RLPIEN)
+			x->irq_rx_path_in_lpi_mode_n++;
+		if (status & GMAC4_LPI_CTRL_STATUS_RLPIEX)
+			x->irq_rx_path_exit_lpi_mode_n++;
+	}
+
 	dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x);
 	if (intr_status & PCS_RGSMIIIS_IRQ)
 		dwmac4_phystatus(ioaddr, x);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index e84831e1b63b..c110f6850ffa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -191,7 +191,7 @@ static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan)
 }
 
 static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
-				       u32 channel, int fifosz)
+				       u32 channel, int fifosz, u8 qmode)
 {
 	unsigned int rqs = fifosz / 256 - 1;
 	u32 mtl_rx_op, mtl_rx_int;
@@ -218,8 +218,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
 	mtl_rx_op &= ~MTL_OP_MODE_RQS_MASK;
 	mtl_rx_op |= rqs << MTL_OP_MODE_RQS_SHIFT;
 
-	/* enable flow control only if each channel gets 4 KiB or more FIFO */
-	if (fifosz >= 4096) {
+	/* Enable flow control only if each channel gets 4 KiB or more FIFO and
+	 * only if channel is not an AVB channel.
+	 */
+	if ((fifosz >= 4096) && (qmode != MTL_QUEUE_AVB)) {
 		unsigned int rfd, rfa;
 
 		mtl_rx_op |= MTL_OP_MODE_EHFC;
@@ -271,9 +273,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
 }
 
 static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
-				       u32 channel)
+				       u32 channel, int fifosz, u8 qmode)
 {
 	u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+	unsigned int tqs = fifosz / 256 - 1;
 
 	if (mode == SF_DMA_MODE) {
 		pr_debug("GMAC: enable TX store and forward mode\n");
@@ -306,12 +309,18 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
 	 * For an IP with DWC_EQOS_NUM_TXQ > 1, the fields TXQEN and TQS are R/W
 	 * with reset values: TXQEN off, TQS 256 bytes.
 	 *
-	 * Write the bits in both cases, since it will have no effect when RO.
-	 * For DWC_EQOS_NUM_TXQ > 1, the top bits in MTL_OP_MODE_TQS_MASK might
-	 * be RO, however, writing the whole TQS field will result in a value
-	 * equal to DWC_EQOS_TXFIFO_SIZE, just like for DWC_EQOS_NUM_TXQ == 1.
+	 * TXQEN must be written for multi-channel operation and TQS must
+	 * reflect the available fifo size per queue (total fifo size / number
+	 * of enabled queues).
 	 */
-	mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK;
+	mtl_tx_op &= ~MTL_OP_MODE_TXQEN_MASK;
+	if (qmode != MTL_QUEUE_AVB)
+		mtl_tx_op |= MTL_OP_MODE_TXQEN;
+	else
+		mtl_tx_op |= MTL_OP_MODE_TXQEN_AV;
+	mtl_tx_op &= ~MTL_OP_MODE_TQS_MASK;
+	mtl_tx_op |= tqs << MTL_OP_MODE_TQS_SHIFT;
+
 	writel(mtl_tx_op, ioaddr +  MTL_CHAN_TX_OP_MODE(channel));
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 16bd50929084..ff4fb5eae1af 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1749,12 +1749,20 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 	u32 rx_channels_count = priv->plat->rx_queues_to_use;
 	u32 tx_channels_count = priv->plat->tx_queues_to_use;
 	int rxfifosz = priv->plat->rx_fifo_size;
+	int txfifosz = priv->plat->tx_fifo_size;
 	u32 txmode = 0;
 	u32 rxmode = 0;
 	u32 chan = 0;
+	u8 qmode = 0;
 
 	if (rxfifosz == 0)
 		rxfifosz = priv->dma_cap.rx_fifo_size;
+	if (txfifosz == 0)
+		txfifosz = priv->dma_cap.tx_fifo_size;
+
+	/* Adjust for real per queue fifo size */
+	rxfifosz /= rx_channels_count;
+	txfifosz /= tx_channels_count;
 
 	if (priv->plat->force_thresh_dma_mode) {
 		txmode = tc;
@@ -1777,12 +1785,19 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 
 	/* configure all channels */
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
-		for (chan = 0; chan < rx_channels_count; chan++)
+		for (chan = 0; chan < rx_channels_count; chan++) {
+			qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
+
 			priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
-						   rxfifosz);
+						   rxfifosz, qmode);
+		}
 
-		for (chan = 0; chan < tx_channels_count; chan++)
-			priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+		for (chan = 0; chan < tx_channels_count; chan++) {
+			qmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
+
+			priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
+						   txfifosz, qmode);
+		}
 	} else {
 		priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
 					rxfifosz);
@@ -1946,15 +1961,27 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
 					  u32 rxmode, u32 chan)
 {
+	u8 rxqmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
+	u8 txqmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
+	u32 rx_channels_count = priv->plat->rx_queues_to_use;
+	u32 tx_channels_count = priv->plat->tx_queues_to_use;
 	int rxfifosz = priv->plat->rx_fifo_size;
+	int txfifosz = priv->plat->tx_fifo_size;
 
 	if (rxfifosz == 0)
 		rxfifosz = priv->dma_cap.rx_fifo_size;
+	if (txfifosz == 0)
+		txfifosz = priv->dma_cap.tx_fifo_size;
+
+	/* Adjust for real per queue fifo size */
+	rxfifosz /= rx_channels_count;
+	txfifosz /= tx_channels_count;
 
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 		priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
-					   rxfifosz);
-		priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+					   rxfifosz, rxqmode);
+		priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
+					   txfifosz, txqmode);
 	} else {
 		priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
 					rxfifosz);
@@ -2217,10 +2244,8 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 {
 	priv->tx_coal_frames = STMMAC_TX_FRAMES;
 	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-	init_timer(&priv->txtimer);
+	setup_timer(&priv->txtimer, stmmac_tx_timer, (unsigned long)priv);
 	priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
-	priv->txtimer.data = (unsigned long)priv;
-	priv->txtimer.function = stmmac_tx_timer;
 	add_timer(&priv->txtimer);
 }
 
@@ -3724,6 +3749,20 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	return ret;
 }
 
+static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	int ret = 0;
+
+	ret = eth_mac_addr(ndev, addr);
+	if (ret)
+		return ret;
+
+	priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0);
+
+	return ret;
+}
+
 #ifdef CONFIG_DEBUG_FS
 static struct dentry *stmmac_fs_dir;
 
@@ -3951,7 +3990,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = stmmac_poll_controller,
 #endif
-	.ndo_set_mac_address = eth_mac_addr,
+	.ndo_set_mac_address = stmmac_set_mac_address,
 };
 
 /**
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 195eb7e71473..05f122b8424a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -318,10 +318,6 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 	bool mdio = true;
 	static const struct of_device_id need_mdio_ids[] = {
 		{ .compatible = "snps,dwc-qos-ethernet-4.10" },
-		{ .compatible = "allwinner,sun8i-a83t-emac" },
-		{ .compatible = "allwinner,sun8i-h3-emac" },
-		{ .compatible = "allwinner,sun8i-v3s-emac" },
-		{ .compatible = "allwinner,sun50i-a64-emac" },
 		{},
 	};
 
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 382993c1561c..113bd57e2ea0 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -4079,9 +4079,9 @@ done:
 #endif
 }
 
-static void cas_link_timer(unsigned long data)
+static void cas_link_timer(struct timer_list *t)
 {
-	struct cas *cp = (struct cas *) data;
+	struct cas *cp = from_timer(cp, t, link_timer);
 	int mask, pending = 0, reset = 0;
 	unsigned long flags;
 
@@ -5039,9 +5039,7 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	spin_lock_init(&cp->stat_lock[N_TX_RINGS]);
 	mutex_init(&cp->pm_mutex);
 
-	init_timer(&cp->link_timer);
-	cp->link_timer.function = cas_link_timer;
-	cp->link_timer.data = (unsigned long) cp;
+	timer_setup(&cp->link_timer, cas_link_timer, 0);
 
 #if 1
 	/* Just in case the implementation of atomic operations
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 5b56c24b6ed2..5ea037672e6f 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -307,7 +307,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 
 	/* Get (or create) the vnet associated with this port */
 	vp = vsw_get_vnet(hp, vdev->mp, &handle);
-	if (unlikely(IS_ERR(vp))) {
+	if (IS_ERR(vp)) {
 		err = PTR_ERR(vp);
 		pr_err("Failed to get vnet for vsw-port\n");
 		mdesc_release(hp);
@@ -363,8 +363,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	list_add_rcu(&port->list, &vp->port_list);
 	spin_unlock_irqrestore(&vp->lock, flags);
 
-	setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common,
-		    (unsigned long)port);
+	timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0);
 
 	err = register_netdev(dev);
 	if (err) {
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 8ab0fb6892d5..06001bacbe0f 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -2221,9 +2221,9 @@ static int niu_link_status(struct niu *np, int *link_up_p)
 	return err;
 }
 
-static void niu_timer(unsigned long __opaque)
+static void niu_timer(struct timer_list *t)
 {
-	struct niu *np = (struct niu *) __opaque;
+	struct niu *np = from_timer(np, t, timer);
 	unsigned long off;
 	int err, link_up;
 
@@ -6123,10 +6123,8 @@ static int niu_open(struct net_device *dev)
 
 	err = niu_init_hw(np);
 	if (!err) {
-		init_timer(&np->timer);
+		timer_setup(&np->timer, niu_timer, 0);
 		np->timer.expires = jiffies + HZ;
-		np->timer.data = (unsigned long) np;
-		np->timer.function = niu_timer;
 
 		err = niu_enable_interrupts(np, 1);
 		if (err)
@@ -6775,10 +6773,8 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu)
 
 	err = niu_init_hw(np);
 	if (!err) {
-		init_timer(&np->timer);
+		timer_setup(&np->timer, niu_timer, 0);
 		np->timer.expires = jiffies + HZ;
-		np->timer.data = (unsigned long) np;
-		np->timer.function = niu_timer;
 
 		err = niu_enable_interrupts(np, 1);
 		if (err)
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 3189722110c2..0b1f41f6bceb 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -523,9 +523,9 @@ static int try_next_permutation(struct bigmac *bp, void __iomem *tregs)
 	return -1;
 }
 
-static void bigmac_timer(unsigned long data)
+static void bigmac_timer(struct timer_list *t)
 {
-	struct bigmac *bp = (struct bigmac *) data;
+	struct bigmac *bp = from_timer(bp, t, bigmac_timer);
 	void __iomem *tregs = bp->tregs;
 	int restart_timer = 0;
 
@@ -613,8 +613,6 @@ static void bigmac_begin_auto_negotiation(struct bigmac *bp)
 	bp->timer_state = ltrywait;
 	bp->timer_ticks = 0;
 	bp->bigmac_timer.expires = jiffies + (12 * HZ) / 10;
-	bp->bigmac_timer.data = (unsigned long) bp;
-	bp->bigmac_timer.function = bigmac_timer;
 	add_timer(&bp->bigmac_timer);
 }
 
@@ -921,7 +919,7 @@ static int bigmac_open(struct net_device *dev)
 		printk(KERN_ERR "BIGMAC: Can't order irq %d to go.\n", dev->irq);
 		return ret;
 	}
-	init_timer(&bp->bigmac_timer);
+	timer_setup(&bp->bigmac_timer, bigmac_timer, 0);
 	ret = bigmac_init_hw(bp, 0);
 	if (ret)
 		free_irq(dev->irq, bp);
@@ -1172,7 +1170,7 @@ static int bigmac_ether_init(struct platform_device *op,
 					      "board-version", 1);
 
 	/* Init auto-negotiation timer state. */
-	init_timer(&bp->bigmac_timer);
+	timer_setup(&bp->bigmac_timer, bigmac_timer, 0);
 	bp->timer_state = asleep;
 	bp->timer_ticks = 0;
 
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index fa607d062cb3..a7afcee3c5ae 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1496,9 +1496,9 @@ static int gem_mdio_link_not_up(struct gem *gp)
 	}
 }
 
-static void gem_link_timer(unsigned long data)
+static void gem_link_timer(struct timer_list *t)
 {
-	struct gem *gp = (struct gem *) data;
+	struct gem *gp = from_timer(gp, t, link_timer);
 	struct net_device *dev = gp->dev;
 	int restart_aneg = 0;
 
@@ -2910,9 +2910,7 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	gp->msg_enable = DEFAULT_MSG;
 
-	init_timer(&gp->link_timer);
-	gp->link_timer.function = gem_link_timer;
-	gp->link_timer.data = (unsigned long) gp;
+	timer_setup(&gp->link_timer, gem_link_timer, 0);
 
 	INIT_WORK(&gp->reset_task, gem_reset_task);
 
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 9e983e1d8249..0431f1e5f511 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -685,9 +685,9 @@ static int is_lucent_phy(struct happy_meal *hp)
 	return ret;
 }
 
-static void happy_meal_timer(unsigned long data)
+static void happy_meal_timer(struct timer_list *t)
 {
-	struct happy_meal *hp = (struct happy_meal *) data;
+	struct happy_meal *hp = from_timer(hp, t, happy_timer);
 	void __iomem *tregs = hp->tcvregs;
 	int restart_timer = 0;
 
@@ -1413,8 +1413,6 @@ force_link:
 
 	hp->timer_ticks = 0;
 	hp->happy_timer.expires = jiffies + (12 * HZ)/10;  /* 1.2 sec. */
-	hp->happy_timer.data = (unsigned long) hp;
-	hp->happy_timer.function = happy_meal_timer;
 	add_timer(&hp->happy_timer);
 }
 
@@ -2819,7 +2817,7 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe)
 	hp->timer_state = asleep;
 	hp->timer_ticks = 0;
 
-	init_timer(&hp->happy_timer);
+	timer_setup(&hp->happy_timer, happy_meal_timer, 0);
 
 	hp->dev = dev;
 	dev->netdev_ops = &hme_netdev_ops;
@@ -3133,7 +3131,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
 	hp->timer_state = asleep;
 	hp->timer_ticks = 0;
 
-	init_timer(&hp->happy_timer);
+	timer_setup(&hp->happy_timer, happy_meal_timer, 0);
 
 	hp->irq = pdev->irq;
 	hp->dev = dev;
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 0b95105f7060..27fb22638885 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -492,8 +492,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	pr_info("%s: PORT ( remote-mac %pM%s )\n",
 		vp->dev->name, port->raddr, switch_port ? " switch-port" : "");
 
-	setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common,
-		    (unsigned long)port);
+	timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0);
 
 	napi_enable(&port->napi);
 	vio_port_up(&port->vio);
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index ecf456c7b6d1..8aa3ce46bb81 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1040,9 +1040,9 @@ static inline void vnet_free_skbs(struct sk_buff *skb)
 	}
 }
 
-void sunvnet_clean_timer_expire_common(unsigned long port0)
+void sunvnet_clean_timer_expire_common(struct timer_list *t)
 {
-	struct vnet_port *port = (struct vnet_port *)port0;
+	struct vnet_port *port = from_timer(port, t, clean_timer);
 	struct sk_buff *freeskbs;
 	unsigned pending;
 
diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h
index 6a4dd1fb19bf..1ea0b016580a 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.h
+++ b/drivers/net/ethernet/sun/sunvnet_common.h
@@ -130,7 +130,7 @@ struct vnet {
 	((__port)->vsw ? (__port)->dev : (__port)->vp->dev)
 
 /* Common funcs */
-void sunvnet_clean_timer_expire_common(unsigned long port0);
+void sunvnet_clean_timer_expire_common(struct timer_list *t);
 int sunvnet_open_common(struct net_device *dev);
 int sunvnet_close_common(struct net_device *dev);
 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp);
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
index e9672b1f9968..031cf9c3435a 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
@@ -335,7 +335,7 @@ static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata,
 	dma_addr_t pages_dma;
 
 	/* Try to obtain pages, decreasing order if necessary */
-	gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+	gfp |= __GFP_COMP | __GFP_NOWARN;
 	while (order >= 0) {
 		pages = alloc_pages(gfp, order);
 		if (pages)
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index ddd43e09111e..cd1185e66133 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -859,9 +859,7 @@ void cpsw_ale_start(struct cpsw_ale *ale)
 	cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
 	cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
 
-	init_timer(&ale->timer);
-	ale->timer.data	    = (unsigned long)ale;
-	ale->timer.function = cpsw_ale_timer;
+	setup_timer(&ale->timer, cpsw_ale_timer, (unsigned long)ale);
 	if (ale->ageout) {
 		ale->timer.expires = jiffies + ale->ageout;
 		add_timer(&ale->timer);
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 437d36289786..ed58c746e4af 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -906,7 +906,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
 		sw_data[0] = (u32)bufptr;
 	} else {
 		/* Allocate a secondary receive queue entry */
-		page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
+		page = alloc_page(GFP_ATOMIC | GFP_DMA);
 		if (unlikely(!page)) {
 			dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n");
 			goto fail;
@@ -1887,7 +1887,7 @@ static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	/* setup tc must be called under rtnl lock */
 	ASSERT_RTNL();
 
-	if (type != TC_SETUP_MQPRIO)
+	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
 	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 28cb38af1a34..4ad821655e51 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3616,9 +3616,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 	}
 	spin_unlock_bh(&gbe_dev->hw_stats_lock);
 
-	init_timer(&gbe_dev->timer);
-	gbe_dev->timer.data	 = (unsigned long)gbe_dev;
-	gbe_dev->timer.function = netcp_ethss_timer;
+	setup_timer(&gbe_dev->timer, netcp_ethss_timer,
+		    (unsigned long)gbe_dev);
 	gbe_dev->timer.expires	 = jiffies + GBE_TIMER_INTERVAL;
 	add_timer(&gbe_dev->timer);
 	*inst_priv = gbe_dev;
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index c8d53d8c83ee..8f53d762fbc4 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -172,7 +172,8 @@ static u32	tlan_handle_tx_eoc(struct net_device *, u16);
 static u32	tlan_handle_status_check(struct net_device *, u16);
 static u32	tlan_handle_rx_eoc(struct net_device *, u16);
 
-static void	tlan_timer(unsigned long);
+static void	tlan_timer(struct timer_list *t);
+static void	tlan_phy_monitor(struct timer_list *t);
 
 static void	tlan_reset_lists(struct net_device *);
 static void	tlan_free_lists(struct net_device *);
@@ -190,7 +191,6 @@ static void	tlan_phy_power_up(struct net_device *);
 static void	tlan_phy_reset(struct net_device *);
 static void	tlan_phy_start_link(struct net_device *);
 static void	tlan_phy_finish_auto_neg(struct net_device *);
-static void     tlan_phy_monitor(unsigned long);
 
 /*
   static int	tlan_phy_nop(struct net_device *);
@@ -254,11 +254,10 @@ tlan_set_timer(struct net_device *dev, u32 ticks, u32 type)
 			spin_unlock_irqrestore(&priv->lock, flags);
 		return;
 	}
-	priv->timer.function = tlan_timer;
+	priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer;
 	if (!in_irq())
 		spin_unlock_irqrestore(&priv->lock, flags);
 
-	priv->timer.data = (unsigned long) dev;
 	priv->timer_set_at = jiffies;
 	priv->timer_type = type;
 	mod_timer(&priv->timer, jiffies + ticks);
@@ -926,8 +925,8 @@ static int tlan_open(struct net_device *dev)
 		return err;
 	}
 
-	init_timer(&priv->timer);
-	init_timer(&priv->media_timer);
+	timer_setup(&priv->timer, NULL, 0);
+	timer_setup(&priv->media_timer, tlan_phy_monitor, 0);
 
 	tlan_start(dev);
 
@@ -1426,8 +1425,7 @@ static u32 tlan_handle_tx_eof(struct net_device *dev, u16 host_int)
 		tlan_dio_write8(dev->base_addr,
 				TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT);
 		if (priv->timer.function == NULL) {
-			priv->timer.function = tlan_timer;
-			priv->timer.data = (unsigned long) dev;
+			priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer;
 			priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY;
 			priv->timer_set_at = jiffies;
 			priv->timer_type = TLAN_TIMER_ACTIVITY;
@@ -1578,8 +1576,7 @@ drop_and_reuse:
 		tlan_dio_write8(dev->base_addr,
 				TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT);
 		if (priv->timer.function == NULL)  {
-			priv->timer.function = tlan_timer;
-			priv->timer.data = (unsigned long) dev;
+			priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer;
 			priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY;
 			priv->timer_set_at = jiffies;
 			priv->timer_type = TLAN_TIMER_ACTIVITY;
@@ -1836,10 +1833,10 @@ ThunderLAN driver timer function
  *
  **************************************************************/
 
-static void tlan_timer(unsigned long data)
+static void tlan_timer(struct timer_list *t)
 {
-	struct net_device	*dev = (struct net_device *) data;
-	struct tlan_priv	*priv = netdev_priv(dev);
+	struct tlan_priv	*priv = from_timer(priv, t, timer);
+	struct net_device	*dev = priv->dev;
 	u32		elapsed;
 	unsigned long	flags = 0;
 
@@ -1872,7 +1869,6 @@ static void tlan_timer(unsigned long data)
 				tlan_dio_write8(dev->base_addr,
 						TLAN_LED_REG, TLAN_LED_LINK);
 			} else  {
-				priv->timer.function = tlan_timer;
 				priv->timer.expires = priv->timer_set_at
 					+ TLAN_TIMER_ACT_DELAY;
 				spin_unlock_irqrestore(&priv->lock, flags);
@@ -2317,8 +2313,6 @@ tlan_finish_reset(struct net_device *dev)
 			} else
 				netdev_info(dev, "Link active\n");
 			/* Enabling link beat monitoring */
-			priv->media_timer.function = tlan_phy_monitor;
-			priv->media_timer.data = (unsigned long) dev;
 			priv->media_timer.expires = jiffies + HZ;
 			add_timer(&priv->media_timer);
 		}
@@ -2763,10 +2757,10 @@ static void tlan_phy_finish_auto_neg(struct net_device *dev)
  *
  *******************************************************************/
 
-static void tlan_phy_monitor(unsigned long data)
+static void tlan_phy_monitor(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *) data;
-	struct tlan_priv *priv = netdev_priv(dev);
+	struct tlan_priv *priv = from_timer(priv, t, media_timer);
+	struct net_device *dev = priv->dev;
 	u16     phy;
 	u16     phy_status;
 
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index cec9e70ab995..a913538d3213 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -2256,16 +2256,14 @@ spider_net_setup_netdev(struct spider_net_card *card)
 
 	pci_set_drvdata(card->pdev, netdev);
 
-	init_timer(&card->tx_timer);
-	card->tx_timer.function =
-		(void (*)(unsigned long)) spider_net_cleanup_tx_ring;
-	card->tx_timer.data = (unsigned long) card;
+	setup_timer(&card->tx_timer,
+		    (void(*)(unsigned long))spider_net_cleanup_tx_ring,
+		    (unsigned long)card);
 	netdev->irq = card->pdev->irq;
 
 	card->aneg_count = 0;
-	init_timer(&card->aneg_timer);
-	card->aneg_timer.function = spider_net_link_phy;
-	card->aneg_timer.data = (unsigned long) card;
+	setup_timer(&card->aneg_timer, spider_net_link_phy,
+		    (unsigned long)card);
 
 	netif_napi_add(netdev, &card->napi,
 		       spider_net_poll, SPIDER_NET_NAPI_WEIGHT);
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index c2d15d9c0c33..0624b71ab5d4 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -164,7 +164,7 @@ static struct platform_driver tsi_eth_driver = {
 	},
 };
 
-static void tsi108_timed_checker(unsigned long dev_ptr);
+static void tsi108_timed_checker(struct timer_list *t);
 
 #ifdef DEBUG
 static void dump_eth_one(struct net_device *dev)
@@ -1370,7 +1370,7 @@ static int tsi108_open(struct net_device *dev)
 
 	napi_enable(&data->napi);
 
-	setup_timer(&data->timer, tsi108_timed_checker, (unsigned long)dev);
+	timer_setup(&data->timer, tsi108_timed_checker, 0);
 	mod_timer(&data->timer, jiffies + 1);
 
 	tsi108_restart_rx(data, dev);
@@ -1666,10 +1666,10 @@ regs_fail:
  * Thus, we have to do it using a timer.
  */
 
-static void tsi108_timed_checker(unsigned long dev_ptr)
+static void tsi108_timed_checker(struct timer_list *t)
 {
-	struct net_device *dev = (struct net_device *)dev_ptr;
-	struct tsi108_prv_data *data = netdev_priv(dev);
+	struct tsi108_prv_data *data = from_timer(data, t, timer);
+	struct net_device *dev = data->dev;
 
 	tsi108_check_phy(dev);
 	tsi108_check_rxring(dev);