506 files changed, 37709 insertions, 19957 deletions
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 3da97996bdf3..8cafd06ff0c4 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -196,7 +196,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev);
 static int el3_rx(struct net_device *dev);
 static int el3_close(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
-static void el3_tx_timeout (struct net_device *dev);
+static void el3_tx_timeout (struct net_device *dev, unsigned int txqueue);
 static void el3_down(struct net_device *dev);
 static void el3_up(struct net_device *dev);
 static const struct ethtool_ops ethtool_ops;
@@ -689,7 +689,7 @@ el3_open(struct net_device *dev)
 }
 
 static void
-el3_tx_timeout (struct net_device *dev)
+el3_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	int ioaddr = dev->base_addr;
 
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index b15752267c8d..1e233e2f0a5a 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -371,7 +371,7 @@ static void corkscrew_timer(struct timer_list *t);
 static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb,
 					struct net_device *dev);
 static int corkscrew_rx(struct net_device *dev);
-static void corkscrew_timeout(struct net_device *dev);
+static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue);
 static int boomerang_rx(struct net_device *dev);
 static irqreturn_t corkscrew_interrupt(int irq, void *dev_id);
 static int corkscrew_close(struct net_device *dev);
@@ -961,7 +961,7 @@ static void corkscrew_timer(struct timer_list *t)
 #endif				/* AUTOMEDIA */
 }
 
-static void corkscrew_timeout(struct net_device *dev)
+static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	int i;
 	struct corkscrew_private *vp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c
index 3044a6f35f04..ef1c3151fbb2 100644
--- a/drivers/net/ethernet/3com/3c574_cs.c
+++ b/drivers/net/ethernet/3com/3c574_cs.c
@@ -234,7 +234,7 @@ static void update_stats(struct net_device *dev);
 static struct net_device_stats *el3_get_stats(struct net_device *dev);
 static int el3_rx(struct net_device *dev, int worklimit);
 static int el3_close(struct net_device *dev);
-static void el3_tx_timeout(struct net_device *dev);
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static void set_rx_mode(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
@@ -690,7 +690,7 @@ static int el3_open(struct net_device *dev)
 	return 0;
 }
 
-static void el3_tx_timeout(struct net_device *dev)
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	unsigned int ioaddr = dev->base_addr;
 	
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 2b2695311bda..d47cde6c5f08 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -173,7 +173,7 @@ static void update_stats(struct net_device *dev);
 static struct net_device_stats *el3_get_stats(struct net_device *dev);
 static int el3_rx(struct net_device *dev);
 static int el3_close(struct net_device *dev);
-static void el3_tx_timeout(struct net_device *dev);
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void set_rx_mode(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
 static const struct ethtool_ops netdev_ethtool_ops;
@@ -526,7 +526,7 @@ static int el3_open(struct net_device *dev)
 	return 0;
 }
 
-static void el3_tx_timeout(struct net_device *dev)
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	unsigned int ioaddr = dev->base_addr;
 
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 8785c2ff3825..a2b7f7ab8170 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -776,7 +776,7 @@ static void set_rx_mode(struct net_device *dev);
 #ifdef CONFIG_PCI
 static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 #endif
-static void vortex_tx_timeout(struct net_device *dev);
+static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void acpi_set_WOL(struct net_device *dev);
 static const struct ethtool_ops vortex_ethtool_ops;
 static void set_8021q_mode(struct net_device *dev, int enable);
@@ -1548,7 +1548,7 @@ vortex_up(struct net_device *dev)
 	struct vortex_private *vp = netdev_priv(dev);
 	void __iomem *ioaddr = vp->ioaddr;
 	unsigned int config;
-	int i, mii_reg1, mii_reg5, err = 0;
+	int i, mii_reg5, err = 0;
 
 	if (VORTEX_PCI(vp)) {
 		pci_set_power_state(VORTEX_PCI(vp), PCI_D0);	/* Go active */
@@ -1605,7 +1605,7 @@ vortex_up(struct net_device *dev)
 	window_write32(vp, config, 3, Wn3_Config);
 
 	if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
-		mii_reg1 = mdio_read(dev, vp->phys[0], MII_BMSR);
+		mdio_read(dev, vp->phys[0], MII_BMSR);
 		mii_reg5 = mdio_read(dev, vp->phys[0], MII_LPA);
 		vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0);
 		vp->mii.full_duplex = vp->full_duplex;
@@ -1877,7 +1877,7 @@ leave_media_alone:
 		iowrite16(FakeIntr, ioaddr + EL3_CMD);
 }
 
-static void vortex_tx_timeout(struct net_device *dev)
+static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct vortex_private *vp = netdev_priv(dev);
 	void __iomem *ioaddr = vp->ioaddr;
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index be823c186517..14fce6658106 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -2013,7 +2013,7 @@ typhoon_stop_runtime(struct typhoon *tp, int wait_type)
 }
 
 static void
-typhoon_tx_timeout(struct net_device *dev)
+typhoon_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct typhoon *tp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c
index 78f3e532c600..0e0aa4016858 100644
--- a/drivers/net/ethernet/8390/8390.c
+++ b/drivers/net/ethernet/8390/8390.c
@@ -36,9 +36,9 @@ void ei_set_multicast_list(struct net_device *dev)
 }
 EXPORT_SYMBOL(ei_set_multicast_list);
 
-void ei_tx_timeout(struct net_device *dev)
+void ei_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
-	__ei_tx_timeout(dev);
+	__ei_tx_timeout(dev, txqueue);
 }
 EXPORT_SYMBOL(ei_tx_timeout);
 
diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h
index 3e2f2c2e7b58..529c728f334a 100644
--- a/drivers/net/ethernet/8390/8390.h
+++ b/drivers/net/ethernet/8390/8390.h
@@ -32,7 +32,7 @@ void NS8390_init(struct net_device *dev, int startp);
 int ei_open(struct net_device *dev);
 int ei_close(struct net_device *dev);
 irqreturn_t ei_interrupt(int irq, void *dev_id);
-void ei_tx_timeout(struct net_device *dev);
+void ei_tx_timeout(struct net_device *dev, unsigned int txqueue);
 netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void ei_set_multicast_list(struct net_device *dev);
 struct net_device_stats *ei_get_stats(struct net_device *dev);
@@ -50,7 +50,7 @@ void NS8390p_init(struct net_device *dev, int startp);
 int eip_open(struct net_device *dev);
 int eip_close(struct net_device *dev);
 irqreturn_t eip_interrupt(int irq, void *dev_id);
-void eip_tx_timeout(struct net_device *dev);
+void eip_tx_timeout(struct net_device *dev, unsigned int txqueue);
 netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void eip_set_multicast_list(struct net_device *dev);
 struct net_device_stats *eip_get_stats(struct net_device *dev);
diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c
index 6cf36992a2c6..6834742057b3 100644
--- a/drivers/net/ethernet/8390/8390p.c
+++ b/drivers/net/ethernet/8390/8390p.c
@@ -41,9 +41,9 @@ void eip_set_multicast_list(struct net_device *dev)
 }
 EXPORT_SYMBOL(eip_set_multicast_list);
 
-void eip_tx_timeout(struct net_device *dev)
+void eip_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
-	__ei_tx_timeout(dev);
+	__ei_tx_timeout(dev, txqueue);
 }
 EXPORT_SYMBOL(eip_tx_timeout);
 
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 0b6bbf63f7ca..aeae7966a082 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -83,7 +83,7 @@ static netdev_tx_t axnet_start_xmit(struct sk_buff *skb,
 					  struct net_device *dev);
 static struct net_device_stats *get_stats(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
-static void axnet_tx_timeout(struct net_device *dev);
+static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
 static void ei_watchdog(struct timer_list *t);
 static void axnet_reset_8390(struct net_device *dev);
@@ -903,7 +903,7 @@ static int ax_close(struct net_device *dev)
  * completed (or failed) - i.e. never posted a Tx related interrupt.
  */
 
-static void axnet_tx_timeout(struct net_device *dev)
+static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	long e8390_base = dev->base_addr;
 	struct ei_device *ei_local = netdev_priv(dev);
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index c9c55c9eab9f..babc92e2692e 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -251,7 +251,7 @@ static int __ei_close(struct net_device *dev)
  * completed (or failed) - i.e. never posted a Tx related interrupt.
  */
 
-static void __ei_tx_timeout(struct net_device *dev)
+static void __ei_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	unsigned long e8390_base = dev->base_addr;
 	struct ei_device *ei_local = netdev_priv(dev);
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 816540e6beac..165d18405b0c 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -576,7 +576,7 @@ static int	mdio_read(struct net_device *dev, int phy_id, int location);
 static void	mdio_write(struct net_device *dev, int phy_id, int location, int value);
 static int	netdev_open(struct net_device *dev);
 static void	check_duplex(struct net_device *dev);
-static void	tx_timeout(struct net_device *dev);
+static void	tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void	init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
@@ -1105,7 +1105,7 @@ static void check_duplex(struct net_device *dev)
 }
 
 
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->base;
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 174344c450af..cb6a761d5c11 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3651,15 +3651,6 @@ static int et131x_close(struct net_device *netdev)
 	return del_timer_sync(&adapter->error_timer);
 }
 
-static int et131x_ioctl(struct net_device *netdev, struct ifreq *reqbuf,
-			int cmd)
-{
-	if (!netdev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(netdev->phydev, reqbuf, cmd);
-}
-
 /* et131x_set_packet_filter - Configures the Rx Packet filtering */
 static int et131x_set_packet_filter(struct et131x_adapter *adapter)
 {
@@ -3811,7 +3802,7 @@ drop_err:
  * specified by the 'tx_timeo" element in the net_device structure (see
  * et131x_alloc_device() to see how this value is set).
  */
-static void et131x_tx_timeout(struct net_device *netdev)
+static void et131x_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct et131x_adapter *adapter = netdev_priv(netdev);
 	struct tx_ring *tx_ring = &adapter->tx_ring;
@@ -3899,7 +3890,7 @@ static const struct net_device_ops et131x_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_get_stats		= et131x_stats,
-	.ndo_do_ioctl		= et131x_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl,
 };
 
 static int et131x_pci_setup(struct pci_dev *pdev,
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index 80ef3e15bd22..9daef4c8feef 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -1791,7 +1791,7 @@ static int slic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	sdev->is_fiber = slic_is_fiber(pdev->subsystem_device);
 	sdev->pdev = pdev;
 	sdev->netdev = dev;
-	sdev->regs = ioremap_nocache(pci_resource_start(pdev, 0),
+	sdev->regs = ioremap(pci_resource_start(pdev, 0),
 				     pci_resource_len(pdev, 0));
 	if (!sdev->regs) {
 		dev_err(&pdev->dev, "failed to map registers\n");
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 0537df06a9b5..22cadfbeedfb 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -207,19 +207,6 @@ static void emac_inblk_32bit(void __iomem *reg, void *data, int count)
 	readsl(reg, data, round_up(count, 4) / 4);
 }
 
-static int emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	struct phy_device *phydev = dev->phydev;
-
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(phydev, rq, cmd);
-}
-
 /* ethtool ops */
 static void emac_get_drvinfo(struct net_device *dev,
 			      struct ethtool_drvinfo *info)
@@ -407,7 +394,7 @@ static void emac_init_device(struct net_device *dev)
 }
 
 /* Our watchdog timed out. Called by the networking layer */
-static void emac_timeout(struct net_device *dev)
+static void emac_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct emac_board_info *db = netdev_priv(dev);
 	unsigned long flags;
@@ -791,7 +778,7 @@ static const struct net_device_ops emac_netdev_ops = {
 	.ndo_start_xmit		= emac_start_xmit,
 	.ndo_tx_timeout		= emac_timeout,
 	.ndo_set_rx_mode	= emac_set_rx_mode,
-	.ndo_do_ioctl		= emac_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= emac_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 46b4207d3266..f366faf88eee 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -437,7 +437,7 @@ static const struct ethtool_ops ace_ethtool_ops = {
 	.set_link_ksettings = ace_set_link_ksettings,
 };
 
-static void ace_watchdog(struct net_device *dev);
+static void ace_watchdog(struct net_device *dev, unsigned int txqueue);
 
 static const struct net_device_ops ace_netdev_ops = {
 	.ndo_open		= ace_open,
@@ -1542,7 +1542,7 @@ static void ace_set_rxtx_parms(struct net_device *dev, int jumbo)
 }
 
 
-static void ace_watchdog(struct net_device *data)
+static void ace_watchdog(struct net_device *data, unsigned int txqueue)
 {
 	struct net_device *dev = data;
 	struct ace_private *ap = netdev_priv(dev);
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 4cd53fc338b5..1671c1f36691 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -1332,10 +1332,10 @@ static int request_and_map(struct platform_device *pdev, const char *name,
 		return -EBUSY;
 	}
 
-	*ptr = devm_ioremap_nocache(device, region->start,
+	*ptr = devm_ioremap(device, region->start,
 				    resource_size(region));
 	if (*ptr == NULL) {
-		dev_err(device, "ioremap_nocache of %s failed!", name);
+		dev_err(device, "ioremap of %s failed!", name);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index fc96c66b44cb..b4e891d49a94 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -740,7 +740,9 @@ static int ena_set_channels(struct net_device *netdev,
 	struct ena_adapter *adapter = netdev_priv(netdev);
 	u32 count = channels->combined_count;
 	/* The check for max value is already done in ethtool */
-	if (count < ENA_MIN_NUM_IO_QUEUES)
+	if (count < ENA_MIN_NUM_IO_QUEUES ||
+	    (ena_xdp_present(adapter) &&
+	    !ena_xdp_legal_queue_count(adapter, channels->combined_count)))
 		return -EINVAL;
 
 	return ena_update_queue_count(adapter, count);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 948583fdcc28..894e8c1a8cf1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -36,7 +36,6 @@
 #include <linux/cpu_rmap.h>
 #endif /* CONFIG_RFS_ACCEL */
 #include <linux/ethtool.h>
-#include <linux/if_vlan.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/numa.h>
@@ -47,6 +46,7 @@
 #include <net/ip.h>
 
 #include "ena_netdev.h"
+#include <linux/bpf_trace.h>
 #include "ena_pci_id_tbl.h"
 
 static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
@@ -78,7 +78,37 @@ static void check_for_admin_com_state(struct ena_adapter *adapter);
 static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
 static int ena_restore_device(struct ena_adapter *adapter);
 
-static void ena_tx_timeout(struct net_device *dev)
+static void ena_init_io_rings(struct ena_adapter *adapter,
+			      int first_index, int count);
+static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
+				   int count);
+static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
+				  int count);
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+					   int first_index,
+					   int count);
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
+				      int first_index, int count);
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
+				     int first_index, int count);
+static int ena_up(struct ena_adapter *adapter);
+static void ena_down(struct ena_adapter *adapter);
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+				 struct ena_ring *rx_ring);
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+				      struct ena_ring *rx_ring);
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+			      struct ena_tx_buffer *tx_info);
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+					    int first_index, int count);
+
+static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ena_adapter *adapter = netdev_priv(dev);
 
@@ -123,6 +153,448 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu)
 	return ret;
 }
 
+static int ena_xmit_common(struct net_device *dev,
+			   struct ena_ring *ring,
+			   struct ena_tx_buffer *tx_info,
+			   struct ena_com_tx_ctx *ena_tx_ctx,
+			   u16 next_to_use,
+			   u32 bytes)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+	int rc, nb_hw_desc;
+
+	if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
+						ena_tx_ctx))) {
+		netif_dbg(adapter, tx_queued, dev,
+			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+			  ring->qid);
+		ena_com_write_sq_doorbell(ring->ena_com_io_sq);
+	}
+
+	/* prepare the packet's descriptors to dma engine */
+	rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
+				&nb_hw_desc);
+
+	/* In case there isn't enough space in the queue for the packet,
+	 * we simply drop it. All other failure reasons of
+	 * ena_com_prepare_tx() are fatal and therefore require a device reset.
+	 */
+	if (unlikely(rc)) {
+		netif_err(adapter, tx_queued, dev,
+			  "failed to prepare tx bufs\n");
+		u64_stats_update_begin(&ring->syncp);
+		ring->tx_stats.prepare_ctx_err++;
+		u64_stats_update_end(&ring->syncp);
+		if (rc != -ENOMEM) {
+			adapter->reset_reason =
+				ENA_REGS_RESET_DRIVER_INVALID_STATE;
+			set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+		}
+		return rc;
+	}
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->tx_stats.cnt++;
+	ring->tx_stats.bytes += bytes;
+	u64_stats_update_end(&ring->syncp);
+
+	tx_info->tx_descs = nb_hw_desc;
+	tx_info->last_jiffies = jiffies;
+	tx_info->print_once = 0;
+
+	ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
+						 ring->ring_size);
+	return 0;
+}
+
+/* This is the XDP napi callback. XDP queues use a separate napi callback
+ * than Rx/Tx queues.
+ */
+static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+{
+	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+	u32 xdp_work_done, xdp_budget;
+	struct ena_ring *xdp_ring;
+	int napi_comp_call = 0;
+	int ret;
+
+	xdp_ring = ena_napi->xdp_ring;
+	xdp_ring->first_interrupt = ena_napi->first_interrupt;
+
+	xdp_budget = budget;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
+	    test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
+		napi_complete_done(napi, 0);
+		return 0;
+	}
+
+	xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
+
+	/* If the device is about to reset or down, avoid unmask
+	 * the interrupt and return 0 so NAPI won't reschedule
+	 */
+	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
+		napi_complete_done(napi, 0);
+		ret = 0;
+	} else if (xdp_budget > xdp_work_done) {
+		napi_comp_call = 1;
+		if (napi_complete_done(napi, xdp_work_done))
+			ena_unmask_interrupt(xdp_ring, NULL);
+		ena_update_ring_numa_node(xdp_ring, NULL);
+		ret = xdp_work_done;
+	} else {
+		ret = xdp_budget;
+	}
+
+	u64_stats_update_begin(&xdp_ring->syncp);
+	xdp_ring->tx_stats.napi_comp += napi_comp_call;
+	xdp_ring->tx_stats.tx_poll++;
+	u64_stats_update_end(&xdp_ring->syncp);
+
+	return ret;
+}
+
+static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring,
+			       struct ena_tx_buffer *tx_info,
+			       struct xdp_buff *xdp,
+			       void **push_hdr,
+			       u32 *push_len)
+{
+	struct ena_adapter *adapter = xdp_ring->adapter;
+	struct ena_com_buf *ena_buf;
+	dma_addr_t dma = 0;
+	u32 size;
+
+	tx_info->xdpf = convert_to_xdp_frame(xdp);
+	size = tx_info->xdpf->len;
+	ena_buf = tx_info->bufs;
+
+	/* llq push buffer */
+	*push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+	*push_hdr = tx_info->xdpf->data;
+
+	if (size - *push_len > 0) {
+		dma = dma_map_single(xdp_ring->dev,
+				     *push_hdr + *push_len,
+				     size - *push_len,
+				     DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
+			goto error_report_dma_error;
+
+		tx_info->map_linear_data = 1;
+		tx_info->num_of_bufs = 1;
+	}
+
+	ena_buf->paddr = dma;
+	ena_buf->len = size;
+
+	return 0;
+
+error_report_dma_error:
+	u64_stats_update_begin(&xdp_ring->syncp);
+	xdp_ring->tx_stats.dma_mapping_err++;
+	u64_stats_update_end(&xdp_ring->syncp);
+	netdev_warn(adapter->netdev, "failed to map xdp buff\n");
+
+	xdp_return_frame_rx_napi(tx_info->xdpf);
+	tx_info->xdpf = NULL;
+	tx_info->num_of_bufs = 0;
+
+	return -EINVAL;
+}
+
+static int ena_xdp_xmit_buff(struct net_device *dev,
+			     struct xdp_buff *xdp,
+			     int qid,
+			     struct ena_rx_buffer *rx_info)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+	struct ena_com_tx_ctx ena_tx_ctx = {0};
+	struct ena_tx_buffer *tx_info;
+	struct ena_ring *xdp_ring;
+	u16 next_to_use, req_id;
+	int rc;
+	void *push_hdr;
+	u32 push_len;
+
+	xdp_ring = &adapter->tx_ring[qid];
+	next_to_use = xdp_ring->next_to_use;
+	req_id = xdp_ring->free_ids[next_to_use];
+	tx_info = &xdp_ring->tx_buffer_info[req_id];
+	tx_info->num_of_bufs = 0;
+	page_ref_inc(rx_info->page);
+	tx_info->xdp_rx_page = rx_info->page;
+
+	rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len);
+	if (unlikely(rc))
+		goto error_drop_packet;
+
+	ena_tx_ctx.ena_bufs = tx_info->bufs;
+	ena_tx_ctx.push_header = push_hdr;
+	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+	ena_tx_ctx.req_id = req_id;
+	ena_tx_ctx.header_len = push_len;
+
+	rc = ena_xmit_common(dev,
+			     xdp_ring,
+			     tx_info,
+			     &ena_tx_ctx,
+			     next_to_use,
+			     xdp->data_end - xdp->data);
+	if (rc)
+		goto error_unmap_dma;
+	/* trigger the dma engine. ena_com_write_sq_doorbell()
+	 * has a mb
+	 */
+	ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
+	u64_stats_update_begin(&xdp_ring->syncp);
+	xdp_ring->tx_stats.doorbells++;
+	u64_stats_update_end(&xdp_ring->syncp);
+
+	return NETDEV_TX_OK;
+
+error_unmap_dma:
+	ena_unmap_tx_buff(xdp_ring, tx_info);
+	tx_info->xdpf = NULL;
+error_drop_packet:
+
+	return NETDEV_TX_OK;
+}
+
+static int ena_xdp_execute(struct ena_ring *rx_ring,
+			   struct xdp_buff *xdp,
+			   struct ena_rx_buffer *rx_info)
+{
+	struct bpf_prog *xdp_prog;
+	u32 verdict = XDP_PASS;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
+
+	if (!xdp_prog)
+		goto out;
+
+	verdict = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (verdict == XDP_TX)
+		ena_xdp_xmit_buff(rx_ring->netdev,
+				  xdp,
+				  rx_ring->qid + rx_ring->adapter->num_io_queues,
+				  rx_info);
+	else if (unlikely(verdict == XDP_ABORTED))
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+	else if (unlikely(verdict > XDP_TX))
+		bpf_warn_invalid_xdp_action(verdict);
+out:
+	rcu_read_unlock();
+	return verdict;
+}
+
+static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
+{
+	adapter->xdp_first_ring = adapter->num_io_queues;
+	adapter->xdp_num_queues = adapter->num_io_queues;
+
+	ena_init_io_rings(adapter,
+			  adapter->xdp_first_ring,
+			  adapter->xdp_num_queues);
+}
+
+static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
+{
+	int rc = 0;
+
+	rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
+					     adapter->xdp_num_queues);
+	if (rc)
+		goto setup_err;
+
+	rc = ena_create_io_tx_queues_in_range(adapter,
+					      adapter->xdp_first_ring,
+					      adapter->xdp_num_queues);
+	if (rc)
+		goto create_err;
+
+	return 0;
+
+create_err:
+	ena_free_all_io_tx_resources(adapter);
+setup_err:
+	return rc;
+}
+
+/* Provides a way for both kernel and bpf-prog to know
+ * more about the RX-queue a given XDP frame arrived on.
+ */
+static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
+{
+	int rc;
+
+	rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid);
+
+	if (rc) {
+		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+			  "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
+			  rx_ring->qid, rc);
+		goto err;
+	}
+
+	rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+					NULL);
+
+	if (rc) {
+		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+			  "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
+			  rx_ring->qid, rc);
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	}
+
+err:
+	return rc;
+}
+
+static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
+{
+	xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+}
+
+void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+					  struct bpf_prog *prog,
+					  int first,
+					  int count)
+{
+	struct ena_ring *rx_ring;
+	int i = 0;
+
+	for (i = first; i < count; i++) {
+		rx_ring = &adapter->rx_ring[i];
+		xchg(&rx_ring->xdp_bpf_prog, prog);
+		if (prog) {
+			ena_xdp_register_rxq_info(rx_ring);
+			rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
+		} else {
+			ena_xdp_unregister_rxq_info(rx_ring);
+			rx_ring->rx_headroom = 0;
+		}
+	}
+}
+
+void ena_xdp_exchange_program(struct ena_adapter *adapter,
+			      struct bpf_prog *prog)
+{
+	struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
+
+	ena_xdp_exchange_program_rx_in_range(adapter,
+					     prog,
+					     0,
+					     adapter->num_io_queues);
+
+	if (old_bpf_prog)
+		bpf_prog_put(old_bpf_prog);
+}
+
+static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
+{
+	bool was_up;
+	int rc;
+
+	was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+	if (was_up)
+		ena_down(adapter);
+
+	adapter->xdp_first_ring = 0;
+	adapter->xdp_num_queues = 0;
+	ena_xdp_exchange_program(adapter, NULL);
+	if (was_up) {
+		rc = ena_up(adapter);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct bpf_prog *prog = bpf->prog;
+	struct bpf_prog *old_bpf_prog;
+	int rc, prev_mtu;
+	bool is_up;
+
+	is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	rc = ena_xdp_allowed(adapter);
+	if (rc == ENA_XDP_ALLOWED) {
+		old_bpf_prog = adapter->xdp_bpf_prog;
+		if (prog) {
+			if (!is_up) {
+				ena_init_all_xdp_queues(adapter);
+			} else if (!old_bpf_prog) {
+				ena_down(adapter);
+				ena_init_all_xdp_queues(adapter);
+			}
+			ena_xdp_exchange_program(adapter, prog);
+
+			if (is_up && !old_bpf_prog) {
+				rc = ena_up(adapter);
+				if (rc)
+					return rc;
+			}
+		} else if (old_bpf_prog) {
+			rc = ena_destroy_and_free_all_xdp_queues(adapter);
+			if (rc)
+				return rc;
+		}
+
+		prev_mtu = netdev->max_mtu;
+		netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
+
+		if (!old_bpf_prog)
+			netif_info(adapter, drv, adapter->netdev,
+				   "xdp program set, changing the max_mtu from %d to %d",
+				   prev_mtu, netdev->max_mtu);
+
+	} else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
+			  netdev->mtu, ENA_XDP_MAX_MTU);
+		NL_SET_ERR_MSG_MOD(bpf->extack,
+				   "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
+		return -EINVAL;
+	} else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
+			  adapter->num_io_queues, adapter->max_num_io_queues);
+		NL_SET_ERR_MSG_MOD(bpf->extack,
+				   "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
+ * program as well as to query the current xdp program id.
+ */
+static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return ena_xdp_set(netdev, bpf);
+	case XDP_QUERY_PROG:
+		bpf->prog_id = adapter->xdp_bpf_prog ?
+			adapter->xdp_bpf_prog->aux->id : 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
 {
 #ifdef CONFIG_RFS_ACCEL
@@ -164,7 +636,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
 	u64_stats_init(&ring->syncp);
 }
 
-static void ena_init_io_rings(struct ena_adapter *adapter)
+static void ena_init_io_rings(struct ena_adapter *adapter,
+			      int first_index, int count)
 {
 	struct ena_com_dev *ena_dev;
 	struct ena_ring *txr, *rxr;
@@ -172,13 +645,12 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 
 	ena_dev = adapter->ena_dev;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = first_index; i < first_index + count; i++) {
 		txr = &adapter->tx_ring[i];
 		rxr = &adapter->rx_ring[i];
 
-		/* TX/RX common ring state */
+		/* TX common ring state */
 		ena_init_io_rings_common(adapter, txr, i);
-		ena_init_io_rings_common(adapter, rxr, i);
 
 		/* TX specific ring state */
 		txr->ring_size = adapter->requested_tx_ring_size;
@@ -188,14 +660,20 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
 		txr->smoothed_interval =
 			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 
-		/* RX specific ring state */
-		rxr->ring_size = adapter->requested_rx_ring_size;
-		rxr->rx_copybreak = adapter->rx_copybreak;
-		rxr->sgl_size = adapter->max_rx_sgl_size;
-		rxr->smoothed_interval =
-			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
-		rxr->empty_rx_queue = 0;
-		adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+		/* Don't init RX queues for xdp queues */
+		if (!ENA_IS_XDP_INDEX(adapter, i)) {
+			/* RX common ring state */
+			ena_init_io_rings_common(adapter, rxr, i);
+
+			/* RX specific ring state */
+			rxr->ring_size = adapter->requested_rx_ring_size;
+			rxr->rx_copybreak = adapter->rx_copybreak;
+			rxr->sgl_size = adapter->max_rx_sgl_size;
+			rxr->smoothed_interval =
+				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+			rxr->empty_rx_queue = 0;
+			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+		}
 	}
 }
 
@@ -285,16 +763,13 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
 	tx_ring->push_buf_intermediate_buf = NULL;
 }
 
-/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
- * @adapter: private structure
- *
- * Return 0 on success, negative on failure
- */
-static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+					   int first_index,
+					   int count)
 {
 	int i, rc = 0;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = first_index; i < first_index + count; i++) {
 		rc = ena_setup_tx_resources(adapter, i);
 		if (rc)
 			goto err_setup_tx;
@@ -308,11 +783,20 @@ err_setup_tx:
 		  "Tx queue %d: allocation failed\n", i);
 
 	/* rewind the index freeing the rings as we go */
-	while (i--)
+	while (first_index < i--)
 		ena_free_tx_resources(adapter, i);
 	return rc;
 }
 
+static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+						  int first_index, int count)
+{
+	int i;
+
+	for (i = first_index; i < first_index + count; i++)
+		ena_free_tx_resources(adapter, i);
+}
+
 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
  * @adapter: board private structure
  *
@@ -320,10 +804,10 @@ err_setup_tx:
  */
 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 {
-	int i;
-
-	for (i = 0; i < adapter->num_io_queues; i++)
-		ena_free_tx_resources(adapter, i);
+	ena_free_all_io_tx_resources_in_range(adapter,
+					      0,
+					      adapter->xdp_num_queues +
+					      adapter->num_io_queues);
 }
 
 static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
@@ -495,8 +979,8 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 	rx_info->page = page;
 	rx_info->page_offset = 0;
 	ena_buf = &rx_info->ena_buf;
-	ena_buf->paddr = dma;
-	ena_buf->len = ENA_PAGE_SIZE;
+	ena_buf->paddr = dma + rx_ring->rx_headroom;
+	ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom;
 
 	return 0;
 }
@@ -513,7 +997,9 @@ static void ena_free_rx_page(struct ena_ring *rx_ring,
 		return;
 	}
 
-	dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE,
+	dma_unmap_page(rx_ring->dev,
+		       ena_buf->paddr - rx_ring->rx_headroom,
+		       ENA_PAGE_SIZE,
 		       DMA_FROM_DEVICE);
 
 	__free_page(page);
@@ -620,8 +1106,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
 		ena_free_rx_bufs(adapter, i);
 }
 
-static void ena_unmap_tx_skb(struct ena_ring *tx_ring,
-				    struct ena_tx_buffer *tx_info)
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+			      struct ena_tx_buffer *tx_info)
 {
 	struct ena_com_buf *ena_buf;
 	u32 cnt;
@@ -675,7 +1161,7 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 				   tx_ring->qid, i);
 		}
 
-		ena_unmap_tx_skb(tx_ring, tx_info);
+		ena_unmap_tx_buff(tx_ring, tx_info);
 
 		dev_kfree_skb_any(tx_info->skb);
 	}
@@ -688,7 +1174,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
 	struct ena_ring *tx_ring;
 	int i;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
 		tx_ring = &adapter->tx_ring[i];
 		ena_free_tx_bufs(tx_ring);
 	}
@@ -699,7 +1185,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
 	u16 ena_qid;
 	int i;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
 		ena_qid = ENA_IO_TXQ_IDX(i);
 		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
 	}
@@ -723,6 +1209,32 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
 	ena_destroy_all_rx_queues(adapter);
 }
 
+static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
+				 struct ena_tx_buffer *tx_info, bool is_xdp)
+{
+	if (tx_info)
+		netif_err(ring->adapter,
+			  tx_done,
+			  ring->netdev,
+			  "tx_info doesn't have valid %s",
+			   is_xdp ? "xdp frame" : "skb");
+	else
+		netif_err(ring->adapter,
+			  tx_done,
+			  ring->netdev,
+			  "Invalid req_id: %hu\n",
+			  req_id);
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->tx_stats.bad_req_id++;
+	u64_stats_update_end(&ring->syncp);
+
+	/* Trigger device reset */
+	ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
+	set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags);
+	return -EFAULT;
+}
+
 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
 {
 	struct ena_tx_buffer *tx_info = NULL;
@@ -733,21 +1245,20 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
 			return 0;
 	}
 
-	if (tx_info)
-		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
-			  "tx_info doesn't have valid skb\n");
-	else
-		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
-			  "Invalid req_id: %hu\n", req_id);
+	return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
+}
 
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->tx_stats.bad_req_id++;
-	u64_stats_update_end(&tx_ring->syncp);
+static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
+{
+	struct ena_tx_buffer *tx_info = NULL;
 
-	/* Trigger device reset */
-	tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
-	set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
-	return -EFAULT;
+	if (likely(req_id < xdp_ring->ring_size)) {
+		tx_info = &xdp_ring->tx_buffer_info[req_id];
+		if (likely(tx_info->xdpf))
+			return 0;
+	}
+
+	return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
 }
 
 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
@@ -786,7 +1297,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
 		tx_info->skb = NULL;
 		tx_info->last_jiffies = 0;
 
-		ena_unmap_tx_skb(tx_ring, tx_info);
+		ena_unmap_tx_buff(tx_ring, tx_info);
 
 		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
 			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
@@ -1037,6 +1548,33 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring,
 	}
 }
 
+int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+{
+	struct ena_rx_buffer *rx_info;
+	int ret;
+
+	rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
+	xdp->data = page_address(rx_info->page) +
+		rx_info->page_offset + rx_ring->rx_headroom;
+	xdp_set_data_meta_invalid(xdp);
+	xdp->data_hard_start = page_address(rx_info->page);
+	xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
+	/* If for some reason we received a bigger packet than
+	 * we expect, then we simply drop it
+	 */
+	if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
+		return XDP_DROP;
+
+	ret = ena_xdp_execute(rx_ring, xdp, rx_info);
+
+	/* The xdp program might expand the headers */
+	if (ret == XDP_PASS) {
+		rx_info->page_offset = xdp->data - xdp->data_hard_start;
+		rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
+	}
+
+	return ret;
+}
 /* ena_clean_rx_irq - Cleanup RX irq
  * @rx_ring: RX ring to clean
  * @napi: napi handler
@@ -1048,23 +1586,27 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 			    u32 budget)
 {
 	u16 next_to_clean = rx_ring->next_to_clean;
-	u32 res_budget, work_done;
-
 	struct ena_com_rx_ctx ena_rx_ctx;
 	struct ena_adapter *adapter;
+	u32 res_budget, work_done;
+	int rx_copybreak_pkt = 0;
+	int refill_threshold;
 	struct sk_buff *skb;
 	int refill_required;
-	int refill_threshold;
-	int rc = 0;
+	struct xdp_buff xdp;
 	int total_len = 0;
-	int rx_copybreak_pkt = 0;
+	int xdp_verdict;
+	int rc = 0;
 	int i;
 
 	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 		  "%s qid %d\n", __func__, rx_ring->qid);
 	res_budget = budget;
+	xdp.rxq = &rx_ring->xdp_rxq;
 
 	do {
+		xdp_verdict = XDP_PASS;
+		skb = NULL;
 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
 		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
 		ena_rx_ctx.descs = 0;
@@ -1082,12 +1624,22 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
 			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
 
+		if (ena_xdp_present_ring(rx_ring))
+			xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
+
 		/* allocate skb and fill it */
-		skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
-				 &next_to_clean);
+		if (xdp_verdict == XDP_PASS)
+			skb = ena_rx_skb(rx_ring,
+					 rx_ring->ena_bufs,
+					 ena_rx_ctx.descs,
+					 &next_to_clean);
 
-		/* exit if we failed to retrieve a buffer */
 		if (unlikely(!skb)) {
+			if (xdp_verdict == XDP_TX) {
+				ena_free_rx_page(rx_ring,
+						 &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]);
+				res_budget--;
+			}
 			for (i = 0; i < ena_rx_ctx.descs; i++) {
 				rx_ring->free_ids[next_to_clean] =
 					rx_ring->ena_bufs[i].req_id;
@@ -1095,6 +1647,8 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 					ENA_RX_RING_IDX_NEXT(next_to_clean,
 							     rx_ring->ring_size);
 			}
+			if (xdp_verdict == XDP_TX || xdp_verdict == XDP_DROP)
+				continue;
 			break;
 		}
 
@@ -1188,9 +1742,14 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
 					struct ena_ring *rx_ring)
 {
 	struct ena_eth_io_intr_reg intr_reg;
-	u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
-		rx_ring->smoothed_interval :
-		ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
+	u32 rx_interval = 0;
+	/* Rx ring can be NULL when for XDP tx queues which don't have an
+	 * accompanying rx_ring pair.
+	 */
+	if (rx_ring)
+		rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
+			rx_ring->smoothed_interval :
+			ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
 
 	/* Update intr register: rx intr delay,
 	 * tx intr delay and interrupt unmask
@@ -1203,8 +1762,9 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
 	/* It is a shared MSI-X.
 	 * Tx and Rx CQ have pointer to it.
 	 * So we use one of them to reach the intr reg
+	 * The Tx ring is used because the rx_ring is NULL for XDP queues
 	 */
-	ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+	ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
 }
 
 static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
@@ -1222,22 +1782,82 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
 
 	if (numa_node != NUMA_NO_NODE) {
 		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
-		ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
+		if (rx_ring)
+			ena_com_update_numa_node(rx_ring->ena_com_io_cq,
+						 numa_node);
 	}
 
 	tx_ring->cpu = cpu;
-	rx_ring->cpu = cpu;
+	if (rx_ring)
+		rx_ring->cpu = cpu;
 
 	return;
 out:
 	put_cpu();
 }
 
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
+{
+	u32 total_done = 0;
+	u16 next_to_clean;
+	u32 tx_bytes = 0;
+	int tx_pkts = 0;
+	u16 req_id;
+	int rc;
+
+	if (unlikely(!xdp_ring))
+		return 0;
+	next_to_clean = xdp_ring->next_to_clean;
+
+	while (tx_pkts < budget) {
+		struct ena_tx_buffer *tx_info;
+		struct xdp_frame *xdpf;
+
+		rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
+						&req_id);
+		if (rc)
+			break;
+
+		rc = validate_xdp_req_id(xdp_ring, req_id);
+		if (rc)
+			break;
+
+		tx_info = &xdp_ring->tx_buffer_info[req_id];
+		xdpf = tx_info->xdpf;
+
+		tx_info->xdpf = NULL;
+		tx_info->last_jiffies = 0;
+		ena_unmap_tx_buff(xdp_ring, tx_info);
+
+		netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+			  "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
+			  xdpf);
+
+		tx_bytes += xdpf->len;
+		tx_pkts++;
+		total_done += tx_info->tx_descs;
+
+		__free_page(tx_info->xdp_rx_page);
+		xdp_ring->free_ids[next_to_clean] = req_id;
+		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+						     xdp_ring->ring_size);
+	}
+
+	xdp_ring->next_to_clean = next_to_clean;
+	ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
+	ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
+
+	netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+		  "tx_poll: q %d done. total pkts: %d\n",
+		  xdp_ring->qid, tx_pkts);
+
+	return tx_pkts;
+}
+
 static int ena_io_poll(struct napi_struct *napi, int budget)
 {
 	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
 	struct ena_ring *tx_ring, *rx_ring;
-
 	int tx_work_done;
 	int rx_work_done = 0;
 	int tx_budget;
@@ -1247,6 +1867,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
 	tx_ring = ena_napi->tx_ring;
 	rx_ring = ena_napi->rx_ring;
 
+	tx_ring->first_interrupt = ena_napi->first_interrupt;
+	rx_ring->first_interrupt = ena_napi->first_interrupt;
+
 	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
 
 	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
@@ -1322,8 +1945,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
 {
 	struct ena_napi *ena_napi = data;
 
-	ena_napi->tx_ring->first_interrupt = true;
-	ena_napi->rx_ring->first_interrupt = true;
+	ena_napi->first_interrupt = true;
 
 	napi_schedule_irqoff(&ena_napi->napi);
 
@@ -1398,10 +2020,12 @@ static void ena_setup_io_intr(struct ena_adapter *adapter)
 {
 	struct net_device *netdev;
 	int irq_idx, i, cpu;
+	int io_queue_count;
 
 	netdev = adapter->netdev;
+	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = 0; i < io_queue_count; i++) {
 		irq_idx = ENA_IO_IRQ_IDX(i);
 		cpu = i % num_online_cpus();
 
@@ -1529,45 +2153,64 @@ static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
 		synchronize_irq(adapter->irq_tbl[i].vector);
 }
 
-static void ena_del_napi(struct ena_adapter *adapter)
+static void ena_del_napi_in_range(struct ena_adapter *adapter,
+				  int first_index,
+				  int count)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_io_queues; i++)
-		netif_napi_del(&adapter->ena_napi[i].napi);
+	for (i = first_index; i < first_index + count; i++) {
+		/* Check if napi was initialized before */
+		if (!ENA_IS_XDP_INDEX(adapter, i) ||
+		    adapter->ena_napi[i].xdp_ring)
+			netif_napi_del(&adapter->ena_napi[i].napi);
+		else
+			WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
+				adapter->ena_napi[i].xdp_ring);
+	}
 }
 
-static void ena_init_napi(struct ena_adapter *adapter)
+static void ena_init_napi_in_range(struct ena_adapter *adapter,
+				   int first_index, int count)
 {
-	struct ena_napi *napi;
+	struct ena_napi *napi = {0};
 	int i;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = first_index; i < first_index + count; i++) {
 		napi = &adapter->ena_napi[i];
 
 		netif_napi_add(adapter->netdev,
 			       &adapter->ena_napi[i].napi,
-			       ena_io_poll,
+			       ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll,
 			       ENA_NAPI_BUDGET);
-		napi->rx_ring = &adapter->rx_ring[i];
-		napi->tx_ring = &adapter->tx_ring[i];
+
+		if (!ENA_IS_XDP_INDEX(adapter, i)) {
+			napi->rx_ring = &adapter->rx_ring[i];
+			napi->tx_ring = &adapter->tx_ring[i];
+		} else {
+			napi->xdp_ring = &adapter->tx_ring[i];
+		}
 		napi->qid = i;
 	}
 }
 
-static void ena_napi_disable_all(struct ena_adapter *adapter)
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
+				      int first_index,
+				      int count)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_io_queues; i++)
+	for (i = first_index; i < first_index + count; i++)
 		napi_disable(&adapter->ena_napi[i].napi);
 }
 
-static void ena_napi_enable_all(struct ena_adapter *adapter)
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
+				     int first_index,
+				     int count)
 {
 	int i;
 
-	for (i = 0; i < adapter->num_io_queues; i++)
+	for (i = first_index; i < first_index + count; i++)
 		napi_enable(&adapter->ena_napi[i].napi);
 }
 
@@ -1582,7 +2225,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)
 		rc = ena_rss_init_default(adapter);
 		if (rc && (rc != -EOPNOTSUPP)) {
 			netif_err(adapter, ifup, adapter->netdev,
-				  "Failed to init RSS rc: %d\n", rc);
+					"Failed to init RSS rc: %d\n", rc);
 			return rc;
 		}
 	}
@@ -1620,7 +2263,9 @@ static int ena_up_complete(struct ena_adapter *adapter)
 	/* enable transmits */
 	netif_tx_start_all_queues(adapter->netdev);
 
-	ena_napi_enable_all(adapter);
+	ena_napi_enable_in_range(adapter,
+				 0,
+				 adapter->xdp_num_queues + adapter->num_io_queues);
 
 	return 0;
 }
@@ -1653,7 +2298,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
 	if (rc) {
 		netif_err(adapter, ifup, adapter->netdev,
 			  "Failed to create I/O TX queue num %d rc: %d\n",
-			  qid, rc);
+			   qid, rc);
 		return rc;
 	}
 
@@ -1672,12 +2317,13 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
 	return rc;
 }
 
-static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+					    int first_index, int count)
 {
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	int rc, i;
 
-	for (i = 0; i < adapter->num_io_queues; i++) {
+	for (i = first_index; i < first_index + count; i++) {
 		rc = ena_create_io_tx_queue(adapter, i);
 		if (rc)
 			goto create_err;
@@ -1686,7 +2332,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
 	return 0;
 
 create_err:
-	while (i--)
+	while (i-- > first_index)
 		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
 
 	return rc;
@@ -1731,13 +2377,15 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
 		netif_err(adapter, ifup, adapter->netdev,
 			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
 			  qid, rc);
-		ena_com_destroy_io_queue(ena_dev, ena_qid);
-		return rc;
+		goto err;
 	}
 
 	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
 
 	return rc;
+err:
+	ena_com_destroy_io_queue(ena_dev, ena_qid);
+	return rc;
 }
 
 static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
@@ -1764,7 +2412,8 @@ create_err:
 }
 
 static void set_io_rings_size(struct ena_adapter *adapter,
-				     int new_tx_size, int new_rx_size)
+			      int new_tx_size,
+			      int new_rx_size)
 {
 	int i;
 
@@ -1798,14 +2447,24 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter)
 	 * ones due to past queue allocation failures.
 	 */
 	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
-			  adapter->requested_rx_ring_size);
+			adapter->requested_rx_ring_size);
 
 	while (1) {
-		rc = ena_setup_all_tx_resources(adapter);
+		if (ena_xdp_present(adapter)) {
+			rc = ena_setup_and_create_all_xdp_queues(adapter);
+
+			if (rc)
+				goto err_setup_tx;
+		}
+		rc = ena_setup_tx_resources_in_range(adapter,
+						     0,
+						     adapter->num_io_queues);
 		if (rc)
 			goto err_setup_tx;
 
-		rc = ena_create_all_io_tx_queues(adapter);
+		rc = ena_create_io_tx_queues_in_range(adapter,
+						      0,
+						      adapter->num_io_queues);
 		if (rc)
 			goto err_create_tx_queues;
 
@@ -1829,7 +2488,7 @@ err_setup_tx:
 		if (rc != -ENOMEM) {
 			netif_err(adapter, ifup, adapter->netdev,
 				  "Queue creation failed with error code %d\n",
-				  rc);
+				   rc);
 			return rc;
 		}
 
@@ -1852,7 +2511,7 @@ err_setup_tx:
 			new_rx_ring_size = cur_rx_ring_size / 2;
 
 		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
-		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
+				new_rx_ring_size < ENA_MIN_RING_SIZE) {
 			netif_err(adapter, ifup, adapter->netdev,
 				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
 				  ENA_MIN_RING_SIZE);
@@ -1871,10 +2530,11 @@ err_setup_tx:
 
 static int ena_up(struct ena_adapter *adapter)
 {
-	int rc, i;
+	int io_queue_count, rc, i;
 
 	netdev_dbg(adapter->netdev, "%s\n", __func__);
 
+	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
 	ena_setup_io_intr(adapter);
 
 	/* napi poll functions should be initialized before running
@@ -1882,7 +2542,7 @@ static int ena_up(struct ena_adapter *adapter)
 	 * interrupt, causing the ISR to fire immediately while the poll
 	 * function wasn't set yet, causing a null dereference
 	 */
-	ena_init_napi(adapter);
+	ena_init_napi_in_range(adapter, 0, io_queue_count);
 
 	rc = ena_request_io_irq(adapter);
 	if (rc)
@@ -1913,7 +2573,7 @@ static int ena_up(struct ena_adapter *adapter)
 	/* schedule napi in case we had pending packets
 	 * from the last time we disable napi
 	 */
-	for (i = 0; i < adapter->num_io_queues; i++)
+	for (i = 0; i < io_queue_count; i++)
 		napi_schedule(&adapter->ena_napi[i].napi);
 
 	return rc;
@@ -1926,13 +2586,15 @@ err_up:
 err_create_queues_with_backoff:
 	ena_free_io_irq(adapter);
 err_req_irq:
-	ena_del_napi(adapter);
+	ena_del_napi_in_range(adapter, 0, io_queue_count);
 
 	return rc;
 }
 
 static void ena_down(struct ena_adapter *adapter)
 {
+	int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+
 	netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
 
 	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
@@ -1945,7 +2607,7 @@ static void ena_down(struct ena_adapter *adapter)
 	netif_tx_disable(adapter->netdev);
 
 	/* After this point the napi handler won't enable the tx queue */
-	ena_napi_disable_all(adapter);
+	ena_napi_disable_in_range(adapter, 0, io_queue_count);
 
 	/* After destroy the queue there won't be any new interrupts */
 
@@ -1963,7 +2625,7 @@ static void ena_down(struct ena_adapter *adapter)
 
 	ena_disable_io_intr_sync(adapter);
 	ena_free_io_irq(adapter);
-	ena_del_napi(adapter);
+	ena_del_napi_in_range(adapter, 0, io_queue_count);
 
 	ena_free_all_tx_bufs(adapter);
 	ena_free_all_rx_bufs(adapter);
@@ -2053,23 +2715,47 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
 	ena_close(adapter->netdev);
 	adapter->requested_tx_ring_size = new_tx_size;
 	adapter->requested_rx_ring_size = new_rx_size;
-	ena_init_io_rings(adapter);
+	ena_init_io_rings(adapter,
+			  0,
+			  adapter->xdp_num_queues +
+			  adapter->num_io_queues);
 	return dev_was_up ? ena_up(adapter) : 0;
 }
 
 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
 {
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int prev_channel_count;
 	bool dev_was_up;
 
 	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 	ena_close(adapter->netdev);
+	prev_channel_count = adapter->num_io_queues;
 	adapter->num_io_queues = new_channel_count;
+	if (ena_xdp_present(adapter) &&
+	    ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
+		adapter->xdp_first_ring = new_channel_count;
+		adapter->xdp_num_queues = new_channel_count;
+		if (prev_channel_count > new_channel_count)
+			ena_xdp_exchange_program_rx_in_range(adapter,
+							     NULL,
+							     new_channel_count,
+							     prev_channel_count);
+		else
+			ena_xdp_exchange_program_rx_in_range(adapter,
+							     adapter->xdp_bpf_prog,
+							     prev_channel_count,
+							     new_channel_count);
+	}
+
 	/* We need to destroy the rss table so that the indirection
 	 * table will be reinitialized by ena_up()
 	 */
 	ena_com_rss_destroy(ena_dev);
-	ena_init_io_rings(adapter);
+	ena_init_io_rings(adapter,
+			  0,
+			  adapter->xdp_num_queues +
+			  adapter->num_io_queues);
 	return dev_was_up ? ena_open(adapter->netdev) : 0;
 }
 
@@ -2253,7 +2939,7 @@ error_report_dma_error:
 	tx_info->skb = NULL;
 
 	tx_info->num_of_bufs += i;
-	ena_unmap_tx_skb(tx_ring, tx_info);
+	ena_unmap_tx_buff(tx_ring, tx_info);
 
 	return -EINVAL;
 }
@@ -2268,7 +2954,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct netdev_queue *txq;
 	void *push_hdr;
 	u16 next_to_use, req_id, header_len;
-	int qid, rc, nb_hw_desc;
+	int qid, rc;
 
 	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
 	/*  Determine which tx ring we will be placed on */
@@ -2303,50 +2989,17 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* set flags and meta data */
 	ena_tx_csum(&ena_tx_ctx, skb);
 
-	if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) {
-		netif_dbg(adapter, tx_queued, dev,
-			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
-			  qid);
-		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
-	}
-
-	/* prepare the packet's descriptors to dma engine */
-	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
-				&nb_hw_desc);
-
-	/* ena_com_prepare_tx() can't fail due to overflow of tx queue,
-	 * since the number of free descriptors in the queue is checked
-	 * after sending the previous packet. In case there isn't enough
-	 * space in the queue for the next packet, it is stopped
-	 * until there is again enough available space in the queue.
-	 * All other failure reasons of ena_com_prepare_tx() are fatal
-	 * and therefore require a device reset.
-	 */
-	if (unlikely(rc)) {
-		netif_err(adapter, tx_queued, dev,
-			  "failed to prepare tx bufs\n");
-		u64_stats_update_begin(&tx_ring->syncp);
-		tx_ring->tx_stats.prepare_ctx_err++;
-		u64_stats_update_end(&tx_ring->syncp);
-		adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE;
-		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+	rc = ena_xmit_common(dev,
+			     tx_ring,
+			     tx_info,
+			     &ena_tx_ctx,
+			     next_to_use,
+			     skb->len);
+	if (rc)
 		goto error_unmap_dma;
-	}
 
 	netdev_tx_sent_queue(txq, skb->len);
 
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->tx_stats.cnt++;
-	tx_ring->tx_stats.bytes += skb->len;
-	u64_stats_update_end(&tx_ring->syncp);
-
-	tx_info->tx_descs = nb_hw_desc;
-	tx_info->last_jiffies = jiffies;
-	tx_info->print_once = 0;
-
-	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
-		tx_ring->ring_size);
-
 	/* stop the queue when no more space available, the packet can have up
 	 * to sgl_size + 2. one for the meta descriptor and one for header
 	 * (if the header is larger than tx_max_header_size).
@@ -2393,7 +3046,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 
 error_unmap_dma:
-	ena_unmap_tx_skb(tx_ring, tx_info);
+	ena_unmap_tx_buff(tx_ring, tx_info);
 	tx_info->skb = NULL;
 
 error_drop_packet:
@@ -2572,6 +3225,7 @@ static const struct net_device_ops ena_netdev_ops = {
 	.ndo_change_mtu		= ena_change_mtu,
 	.ndo_set_mac_address	= NULL,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_bpf		= ena_xdp,
 };
 
 static int ena_device_validate_params(struct ena_adapter *adapter,
@@ -2951,7 +3605,9 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 	struct ena_ring *tx_ring;
 	struct ena_ring *rx_ring;
 	int i, budget, rc;
+	int io_queue_count;
 
+	io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
 	/* Make sure the driver doesn't turn the device in other process */
 	smp_rmb();
 
@@ -2966,7 +3622,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 
 	budget = ENA_MONITORED_TX_QUEUES;
 
-	for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) {
+	for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
 		tx_ring = &adapter->tx_ring[i];
 		rx_ring = &adapter->rx_ring[i];
 
@@ -2974,7 +3630,8 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 		if (unlikely(rc))
 			return;
 
-		rc = check_for_rx_interrupt_queue(adapter, rx_ring);
+		rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
+			check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
 		if (unlikely(rc))
 			return;
 
@@ -2983,7 +3640,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 			break;
 	}
 
-	adapter->last_monitored_tx_qid = i % adapter->num_io_queues;
+	adapter->last_monitored_tx_qid = i % io_queue_count;
 }
 
 /* trigger napi schedule after 2 consecutive detections */
@@ -3560,6 +4217,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->num_io_queues = max_num_io_queues;
 	adapter->max_num_io_queues = max_num_io_queues;
 
+	adapter->xdp_first_ring = 0;
+	adapter->xdp_num_queues = 0;
+
 	adapter->last_monitored_tx_qid = 0;
 
 	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
@@ -3573,7 +4233,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			"Failed to query interrupt moderation feature\n");
 		goto err_netdev_destroy;
 	}
-	ena_init_io_rings(adapter);
+	ena_init_io_rings(adapter,
+			  0,
+			  adapter->xdp_num_queues +
+			  adapter->num_io_queues);
 
 	netdev->netdev_ops = &ena_netdev_ops;
 	netdev->watchdog_timeo = TX_TIMEOUT;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index bffd778f2ce3..094324fd0edc 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -36,6 +36,7 @@
 #include <linux/bitops.h>
 #include <linux/dim.h>
 #include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
@@ -142,6 +143,18 @@
 
 #define ENA_MMIO_DISABLE_REG_READ	BIT(0)
 
+/* The max MTU size is configured to be the ethernet frame size without
+ * the overhead of the ethernet header, which can have a VLAN header, and
+ * a frame check sequence (FCS).
+ * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
+ */
+
+#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
+				VLAN_HLEN - XDP_PACKET_HEADROOM)
+
+#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
+	((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
+
 struct ena_irq {
 	irq_handler_t handler;
 	void *data;
@@ -155,6 +168,8 @@ struct ena_napi {
 	struct napi_struct napi ____cacheline_aligned;
 	struct ena_ring *tx_ring;
 	struct ena_ring *rx_ring;
+	struct ena_ring *xdp_ring;
+	bool first_interrupt;
 	u32 qid;
 	struct dim dim;
 };
@@ -180,6 +195,17 @@ struct ena_tx_buffer {
 	/* num of buffers used by this skb */
 	u32 num_of_bufs;
 
+	/* XDP buffer structure which is used for sending packets in
+	 * the xdp queues
+	 */
+	struct xdp_frame *xdpf;
+	/* The rx page for the rx buffer that was received in rx and
+	 * re transmitted on xdp tx queues as a result of XDP_TX action.
+	 * We need to free the page once we finished cleaning the buffer in
+	 * clean_xdp_irq()
+	 */
+	struct page *xdp_rx_page;
+
 	/* Indicate if bufs[0] map the linear data of the skb. */
 	u8 map_linear_data;
 
@@ -258,10 +284,13 @@ struct ena_ring {
 	struct ena_adapter *adapter;
 	struct ena_com_io_cq *ena_com_io_cq;
 	struct ena_com_io_sq *ena_com_io_sq;
+	struct bpf_prog *xdp_bpf_prog;
+	struct xdp_rxq_info xdp_rxq;
 
 	u16 next_to_use;
 	u16 next_to_clean;
 	u16 rx_copybreak;
+	u16 rx_headroom;
 	u16 qid;
 	u16 mtu;
 	u16 sgl_size;
@@ -379,6 +408,10 @@ struct ena_adapter {
 	u32 last_monitored_tx_qid;
 
 	enum ena_regs_reset_reason_types reset_reason;
+
+	struct bpf_prog *xdp_bpf_prog;
+	u32 xdp_first_ring;
+	u32 xdp_num_queues;
 };
 
 void ena_set_ethtool_ops(struct net_device *netdev);
@@ -390,8 +423,48 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 int ena_update_queue_sizes(struct ena_adapter *adapter,
 			   u32 new_tx_size,
 			   u32 new_rx_size);
+
 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
 
 int ena_get_sset_count(struct net_device *netdev, int sset);
 
+enum ena_xdp_errors_t {
+	ENA_XDP_ALLOWED = 0,
+	ENA_XDP_CURRENT_MTU_TOO_LARGE,
+	ENA_XDP_NO_ENOUGH_QUEUES,
+};
+
+static inline bool ena_xdp_queues_present(struct ena_adapter *adapter)
+{
+	return adapter->xdp_first_ring != 0;
+}
+
+static inline bool ena_xdp_present(struct ena_adapter *adapter)
+{
+	return !!adapter->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_present_ring(struct ena_ring *ring)
+{
+	return !!ring->xdp_bpf_prog;
+}
+
+static inline int ena_xdp_legal_queue_count(struct ena_adapter *adapter,
+					    u32 queues)
+{
+	return 2 * queues <= adapter->max_num_io_queues;
+}
+
+static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter)
+{
+	enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED;
+
+	if (adapter->netdev->mtu > ENA_XDP_MAX_MTU)
+		rc = ENA_XDP_CURRENT_MTU_TOO_LARGE;
+	else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+		rc = ENA_XDP_NO_ENOUGH_QUEUES;
+
+	return rc;
+}
+
 #endif /* !(ENA_H) */
diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c
index ab30761003da..cf3562e82ca9 100644
--- a/drivers/net/ethernet/amd/7990.c
+++ b/drivers/net/ethernet/amd/7990.c
@@ -527,7 +527,7 @@ int lance_close(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(lance_close);
 
-void lance_tx_timeout(struct net_device *dev)
+void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	printk("lance_tx_timeout\n");
 	lance_reset(dev);
diff --git a/drivers/net/ethernet/amd/7990.h b/drivers/net/ethernet/amd/7990.h
index 741cdc392c6b..8266b3c1fefc 100644
--- a/drivers/net/ethernet/amd/7990.h
+++ b/drivers/net/ethernet/amd/7990.h
@@ -243,7 +243,7 @@ int lance_open(struct net_device *dev);
 int lance_close(struct net_device *dev);
 int lance_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void lance_set_multicast(struct net_device *dev);
-void lance_tx_timeout(struct net_device *dev);
+void lance_tx_timeout(struct net_device *dev, unsigned int txqueue);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 void lance_poll(struct net_device *dev);
 #endif
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index 212fe72a190b..2f808dbc8b0e 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -118,10 +118,6 @@ struct lance_private {
 	int auto_select;	      /* cable-selection by carrier */
 	unsigned short busmaster_regval;
 
-#ifdef CONFIG_SUNLANCE
-	struct Linux_SBus_DMA *ledma; /* if set this points to ledma and arch=4m */
-	int burst_sizes;	      /* ledma SBus burst sizes */
-#endif
 	struct timer_list         multicast_timer;
 	struct net_device	  *dev;
 };
@@ -522,7 +518,7 @@ static inline int lance_reset(struct net_device *dev)
 	return status;
 }
 
-static void lance_tx_timeout(struct net_device *dev)
+static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	volatile struct lance_regs *ll = lp->ll;
@@ -551,11 +547,10 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb,
 	if (!lance_tx_buffs_avail(lp))
 		goto out_free;
 
-#ifdef DEBUG
 	/* dump the packet */
-	print_hex_dump(KERN_DEBUG, "skb->data: ", DUMP_PREFIX_NONE,
-		       16, 1, skb->data, 64, true);
-#endif
+	print_hex_dump_debug("skb->data: ", DUMP_PREFIX_NONE, 16, 1, skb->data,
+			     64, true);
+
 	entry = lp->tx_new & lp->tx_ring_mod_mask;
 	ib->btx_ring[entry].length = (-skblen) | 0xf000;
 	ib->btx_ring[entry].misc = 0;
diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index 0842da492a64..1c53408f5d47 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c
@@ -422,7 +422,7 @@ static void am79c961_setmulticastlist (struct net_device *dev)
 	spin_unlock_irqrestore(&priv->chip_lock, flags);
 }
 
-static void am79c961_timeout(struct net_device *dev)
+static void am79c961_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	printk(KERN_WARNING "%s: transmit timed out, network cable problem?\n",
 		dev->name);
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 573e88fc8ede..0f3b743425e8 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1569,7 +1569,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp)
  * failed or the interface is locked up. This function will reinitialize
  * the hardware.
  */
-static void amd8111e_tx_timeout(struct net_device *dev)
+static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	int err;
diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c
index 4b6a5cb85dd2..5e0f645f5bde 100644
--- a/drivers/net/ethernet/amd/ariadne.c
+++ b/drivers/net/ethernet/amd/ariadne.c
@@ -530,7 +530,7 @@ static inline void ariadne_reset(struct net_device *dev)
 	netif_start_queue(dev);
 }
 
-static void ariadne_tx_timeout(struct net_device *dev)
+static void ariadne_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	volatile struct Am79C960 *lance = (struct Am79C960 *)dev->base_addr;
 
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index d3d44e07afbc..4e36122609a3 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -346,7 +346,7 @@ static int lance_rx( struct net_device *dev );
 static int lance_close( struct net_device *dev );
 static void set_multicast_list( struct net_device *dev );
 static int lance_set_mac_address( struct net_device *dev, void *addr );
-static void lance_tx_timeout (struct net_device *dev);
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue);
 
 /************************* End of Prototypes **************************/
 
@@ -727,7 +727,7 @@ static void lance_init_ring( struct net_device *dev )
 /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
 
 
-static void lance_tx_timeout (struct net_device *dev)
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	struct lance_ioreg	 *IO = lp->iobase;
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index 1793950f0582..089a4fbc61a0 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1014,7 +1014,7 @@ static netdev_tx_t au1000_tx(struct sk_buff *skb, struct net_device *dev)
  * The Tx ring has been full longer than the watchdog timeout
  * value. The transmitter must be hung?
  */
-static void au1000_tx_timeout(struct net_device *dev)
+static void au1000_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	netdev_err(dev, "au1000_tx_timeout: dev=%p\n", dev);
 	au1000_reset_mac(dev);
@@ -1053,23 +1053,12 @@ static void au1000_multicast_list(struct net_device *dev)
 	writel(reg, &aup->mac->control);
 }
 
-static int au1000_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	if (!dev->phydev)
-		return -EINVAL; /* PHY not controllable */
-
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
 static const struct net_device_ops au1000_netdev_ops = {
 	.ndo_open		= au1000_open,
 	.ndo_stop		= au1000_close,
 	.ndo_start_xmit		= au1000_tx,
 	.ndo_set_rx_mode	= au1000_multicast_list,
-	.ndo_do_ioctl		= au1000_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_tx_timeout		= au1000_tx_timeout,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -1161,7 +1150,7 @@ static int au1000_probe(struct platform_device *pdev)
 
 	/* aup->mac is the base address of the MAC's registers */
 	aup->mac = (struct mac_reg *)
-			ioremap_nocache(base->start, resource_size(base));
+			ioremap(base->start, resource_size(base));
 	if (!aup->mac) {
 		dev_err(&pdev->dev, "failed to ioremap MAC registers\n");
 		err = -ENXIO;
@@ -1169,7 +1158,7 @@ static int au1000_probe(struct platform_device *pdev)
 	}
 
 	/* Setup some variables for quick register address access */
-	aup->enable = (u32 *)ioremap_nocache(macen->start,
+	aup->enable = (u32 *)ioremap(macen->start,
 						resource_size(macen));
 	if (!aup->enable) {
 		dev_err(&pdev->dev, "failed to ioremap MAC enable register\n");
@@ -1178,7 +1167,7 @@ static int au1000_probe(struct platform_device *pdev)
 	}
 	aup->mac_id = pdev->id;
 
-	aup->macdma = ioremap_nocache(macdma->start, resource_size(macdma));
+	aup->macdma = ioremap(macdma->start, resource_size(macdma));
 	if (!aup->macdma) {
 		dev_err(&pdev->dev, "failed to ioremap MACDMA registers\n");
 		err = -ENXIO;
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index dac4a2fcad6a..7282ce55ffb8 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -608,7 +608,7 @@ static int lance_rx(struct net_device *dev)
 			len = (*rds_ptr(rd, mblength, lp->type) & 0xfff) - 4;
 			skb = netdev_alloc_skb(dev, len + 2);
 
-			if (skb == 0) {
+			if (!skb) {
 				dev->stats.rx_dropped++;
 				*rds_ptr(rd, mblength, lp->type) = 0;
 				*rds_ptr(rd, rmd1, lp->type) =
@@ -884,7 +884,7 @@ static inline int lance_reset(struct net_device *dev)
 	return status;
 }
 
-static void lance_tx_timeout(struct net_device *dev)
+static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	volatile struct lance_regs *ll = lp->ll;
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index f90b454b1642..aff44241988c 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -306,7 +306,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id);
 static int lance_close(struct net_device *dev);
 static struct net_device_stats *lance_get_stats(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
-static void lance_tx_timeout (struct net_device *dev);
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue);
 
 
 
@@ -913,7 +913,7 @@ lance_restart(struct net_device *dev, unsigned int csr0_bits, int must_reinit)
 }
 
 
-static void lance_tx_timeout (struct net_device *dev)
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	struct lance_private *lp = (struct lance_private *) dev->ml_priv;
 	int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index c6c2a54c1121..c38edf6f03a3 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -254,7 +254,7 @@ static int  ni65_lance_reinit(struct net_device *dev);
 static void ni65_init_lance(struct priv *p,unsigned char*,int,int);
 static netdev_tx_t ni65_send_packet(struct sk_buff *skb,
 				    struct net_device *dev);
-static void  ni65_timeout(struct net_device *dev);
+static void  ni65_timeout(struct net_device *dev, unsigned int txqueue);
 static int  ni65_close(struct net_device *dev);
 static int  ni65_alloc_buffer(struct net_device *dev);
 static void ni65_free_buffer(struct priv *p);
@@ -1133,7 +1133,7 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
  * kick xmitter ..
  */
 
-static void ni65_timeout(struct net_device *dev)
+static void ni65_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	int i;
 	struct priv *p = dev->ml_priv;
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 9c152d85840d..023aecf6ab30 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -407,7 +407,7 @@ static int mace_open(struct net_device *dev);
 static int mace_close(struct net_device *dev);
 static netdev_tx_t mace_start_xmit(struct sk_buff *skb,
 					 struct net_device *dev);
-static void mace_tx_timeout(struct net_device *dev);
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static irqreturn_t mace_interrupt(int irq, void *dev_id);
 static struct net_device_stats *mace_get_stats(struct net_device *dev);
 static int mace_rx(struct net_device *dev, unsigned char RxCnt);
@@ -837,7 +837,7 @@ mace_start_xmit
 	failed, put skb back into a list."
 ---------------------------------------------------------------------------- */
 
-static void mace_tx_timeout(struct net_device *dev)
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
   mace_private *lp = netdev_priv(dev);
   struct pcmcia_device *link = lp->p_dev;
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index f5ad12c10934..dc7d88227e76 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -314,7 +314,7 @@ static int pcnet32_open(struct net_device *);
 static int pcnet32_init_ring(struct net_device *);
 static netdev_tx_t pcnet32_start_xmit(struct sk_buff *,
 				      struct net_device *);
-static void pcnet32_tx_timeout(struct net_device *dev);
+static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static irqreturn_t pcnet32_interrupt(int, void *);
 static int pcnet32_close(struct net_device *);
 static struct net_device_stats *pcnet32_get_stats(struct net_device *);
@@ -2455,7 +2455,7 @@ static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits)
 	lp->a->write_csr(ioaddr, CSR0, csr0_bits);
 }
 
-static void pcnet32_tx_timeout(struct net_device *dev)
+static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct pcnet32_private *lp = netdev_priv(dev);
 	unsigned long ioaddr = dev->base_addr, flags;
diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index ebcbf8ca4829..b00e00881253 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1097,7 +1097,7 @@ static void lance_piozero(void __iomem *dest, int len)
 		sbus_writeb(0, piobuf);
 }
 
-static void lance_tx_timeout(struct net_device *dev)
+static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct lance_private *lp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 98f8f2033154..b71f9b04a51e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2152,7 +2152,7 @@ static int xgbe_change_mtu(struct net_device *netdev, int mtu)
 	return 0;
 }
 
-static void xgbe_tx_timeout(struct net_device *netdev)
+static void xgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 128cd648ba99..46c3c1ca38d6 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -1227,7 +1227,7 @@ static bool xgbe_phy_sfp_verify_eeprom(u8 cc_in, u8 *buf, unsigned int len)
 	for (cc = 0; len; buf++, len--)
 		cc += *buf;
 
-	return (cc == cc_in) ? true : false;
+	return cc == cc_in;
 }
 
 static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index 02b4f3af02b5..c48f60996761 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -575,7 +575,7 @@ static void xge_free_pending_skb(struct net_device *ndev)
 	}
 }
 
-static void xge_timeout(struct net_device *ndev)
+static void xge_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct xge_pdata *pdata = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index d8612131c55e..6aee2f0fc0db 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -859,7 +859,7 @@ static int xgene_enet_napi(struct napi_struct *napi, const int budget)
 	return processed;
 }
 
-static void xgene_enet_timeout(struct net_device *ndev)
+static void xgene_enet_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
 	struct netdev_queue *txq;
@@ -2020,7 +2020,7 @@ static int xgene_enet_probe(struct platform_device *pdev)
 	int ret;
 
 	ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata),
-				  XGENE_NUM_RX_RING, XGENE_NUM_TX_RING);
+				  XGENE_NUM_TX_RING, XGENE_NUM_RX_RING);
 	if (!ndev)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 8d03578d5e8c..95d3061c61be 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -91,7 +91,7 @@ static int mace_set_address(struct net_device *dev, void *addr);
 static void mace_reset(struct net_device *dev);
 static irqreturn_t mace_interrupt(int irq, void *dev_id);
 static irqreturn_t mace_dma_intr(int irq, void *dev_id);
-static void mace_tx_timeout(struct net_device *dev);
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void __mace_set_address(struct net_device *dev, void *addr);
 
 /*
@@ -600,7 +600,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void mace_tx_timeout(struct net_device *dev)
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct mace_data *mp = netdev_priv(dev);
 	volatile struct mace *mb = mp->mace;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 2bb329606794..6b27af0db499 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -253,7 +253,7 @@ static int aq_pci_probe(struct pci_dev *pdev,
 				goto err_free_aq_hw;
 			}
 
-			self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz);
+			self->aq_hw->mmio = ioremap(mmio_pa, reg_sz);
 			if (!self->aq_hw->mmio) {
 				err = -EIO;
 				goto err_free_aq_hw;
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 6f2c867785fe..17bda4e8cc45 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -781,18 +781,6 @@ static int arc_emac_set_address(struct net_device *ndev, void *p)
 	return 0;
 }
 
-static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	if (!dev->phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
-
 /**
  * arc_emac_restart - Restart EMAC
  * @ndev:	Pointer to net_device structure.
@@ -857,7 +845,7 @@ static const struct net_device_ops arc_emac_netdev_ops = {
 	.ndo_set_mac_address	= arc_emac_set_address,
 	.ndo_get_stats		= arc_emac_stats,
 	.ndo_set_rx_mode	= arc_emac_set_rx_mode,
-	.ndo_do_ioctl		= arc_emac_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= arc_emac_poll_controller,
 #endif
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 61a334d1b5e6..e95687a780fb 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1394,14 +1394,6 @@ err_drop:
 	return NETDEV_TX_OK;
 }
 
-static int ag71xx_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
-{
-	if (!ndev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(ndev->phydev, ifr, cmd);
-}
-
 static void ag71xx_oom_timer_handler(struct timer_list *t)
 {
 	struct ag71xx *ag = from_timer(ag, t, oom_timer);
@@ -1409,7 +1401,7 @@ static void ag71xx_oom_timer_handler(struct timer_list *t)
 	napi_schedule(&ag->napi);
 }
 
-static void ag71xx_tx_timeout(struct net_device *ndev)
+static void ag71xx_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct ag71xx *ag = netdev_priv(ndev);
 
@@ -1618,7 +1610,7 @@ static const struct net_device_ops ag71xx_netdev_ops = {
 	.ndo_open		= ag71xx_open,
 	.ndo_stop		= ag71xx_stop,
 	.ndo_start_xmit		= ag71xx_hard_start_xmit,
-	.ndo_do_ioctl		= ag71xx_do_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl,
 	.ndo_tx_timeout		= ag71xx_tx_timeout,
 	.ndo_change_mtu		= ag71xx_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
@@ -1687,8 +1679,7 @@ static int ag71xx_probe(struct platform_device *pdev)
 		goto err_free;
 	}
 
-	ag->mac_base = devm_ioremap_nocache(&pdev->dev, res->start,
-					    resource_size(res));
+	ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
 	if (!ag->mac_base) {
 		err = -ENOMEM;
 		goto err_free;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index d4bbcdfd691a..1dcbc486eca9 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1553,7 +1553,7 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 	return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb));
 }
 
-static void alx_tx_timeout(struct net_device *dev)
+static void alx_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct alx_priv *alx = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 2b239ecea05f..4c0b1f8551dd 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -350,7 +350,7 @@ static void atl1c_del_timer(struct atl1c_adapter *adapter)
  * atl1c_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  */
-static void atl1c_tx_timeout(struct net_device *netdev)
+static void atl1c_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 4f7b65825c15..e0d89942d537 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -251,7 +251,7 @@ static void atl1e_cancel_work(struct atl1e_adapter *adapter)
  * atl1e_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  */
-static void atl1e_tx_timeout(struct net_device *netdev)
+static void atl1e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 3aba38322717..b81a4e0c5b57 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1001,7 +1001,7 @@ static int atl2_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
  * atl2_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  */
-static void atl2_tx_timeout(struct net_device *netdev)
+static void atl2_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c
index 505a22c703f7..0941d07d0833 100644
--- a/drivers/net/ethernet/atheros/atlx/atlx.c
+++ b/drivers/net/ethernet/atheros/atlx/atlx.c
@@ -183,7 +183,7 @@ static void atlx_clear_phy_int(struct atlx_adapter *adapter)
  * atlx_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  */
-static void atlx_tx_timeout(struct net_device *netdev)
+static void atlx_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct atlx_adapter *adapter = netdev_priv(netdev);
 	/* Do the reset outside of interrupt context */
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 30b455013bf3..bc273e0db7ff 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1005,18 +1005,13 @@ static int nb8800_stop(struct net_device *dev)
 	return 0;
 }
 
-static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
 static const struct net_device_ops nb8800_netdev_ops = {
 	.ndo_open		= nb8800_open,
 	.ndo_stop		= nb8800_stop,
 	.ndo_start_xmit		= nb8800_xmit,
 	.ndo_set_mac_address	= nb8800_set_mac_address,
 	.ndo_set_rx_mode	= nb8800_set_rx_mode,
-	.ndo_do_ioctl		= nb8800_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index ec25fd81985d..a780b7215021 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -948,7 +948,7 @@ irq_ack:
 	return IRQ_RETVAL(handled);
 }
 
-static void b44_tx_timeout(struct net_device *dev)
+static void b44_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct b44 *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index d6b1a153f9df..f07ac0e0af59 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1354,7 +1354,7 @@ out:
 	return ret;
 }
 
-static void bcm_sysport_tx_timeout(struct net_device *dev)
+static void bcm_sysport_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	netdev_warn(dev, "transmit timeout!\n");
 
@@ -2428,6 +2428,14 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	if (!of_id || !of_id->data)
 		return -EINVAL;
 
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+	if (ret)
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(&pdev->dev, "unable to set DMA mask: %d\n", ret);
+		return ret;
+	}
+
 	/* Fairly quickly we need to know the type of adapter we have */
 	params = of_id->data;
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 148734b166f0..1bb07a5d82c9 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1248,14 +1248,6 @@ static int bgmac_set_mac_address(struct net_device *net_dev, void *addr)
 	return 0;
 }
 
-static int bgmac_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
-{
-	if (!netif_running(net_dev))
-		return -EINVAL;
-
-	return phy_mii_ioctl(net_dev->phydev, ifr, cmd);
-}
-
 static const struct net_device_ops bgmac_netdev_ops = {
 	.ndo_open		= bgmac_open,
 	.ndo_stop		= bgmac_stop,
@@ -1263,7 +1255,7 @@ static const struct net_device_ops bgmac_netdev_ops = {
 	.ndo_set_rx_mode	= bgmac_set_rx_mode,
 	.ndo_set_mac_address	= bgmac_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_do_ioctl           = bgmac_ioctl,
+	.ndo_do_ioctl           = phy_do_ioctl_running,
 };
 
 /**************************************************
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index fbc196b480b6..dbb7874607ca 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -6575,7 +6575,7 @@ bnx2_dump_state(struct bnx2 *bp)
 }
 
 static void
-bnx2_tx_timeout(struct net_device *dev)
+bnx2_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 5e037a305b83..ee9e9290f112 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4970,7 +4970,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features)
 	return 0;
 }
 
-void bnx2x_tx_timeout(struct net_device *dev)
+void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 3f63ffd7561b..6f1352d51cb2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -617,7 +617,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features);
  *
  * @dev:	net device
  */
-void bnx2x_tx_timeout(struct net_device *dev);
+void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue);
 
 /** bnx2x_get_c2s_mapping - read inner-to-outer vlan configuration
  * c2s_map should have BNX2X_MAX_PRIORITY entries.
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index cff64e43bdd8..1c26fa962233 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -14053,7 +14053,7 @@ static int bnx2x_init_one(struct pci_dev *pdev,
 			rc = -ENOMEM;
 			goto init_one_freemem;
 		}
-		bp->doorbells = ioremap_nocache(pci_resource_start(pdev, 2),
+		bp->doorbells = ioremap(pci_resource_start(pdev, 2),
 						doorbell_size);
 	}
 	if (!bp->doorbells) {
@@ -15410,6 +15410,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
 		REG_WR(bp, rule, BNX2X_PTP_TX_ON_RULE_MASK);
 		break;
 	case HWTSTAMP_TX_ONESTEP_SYNC:
+	case HWTSTAMP_TX_ONESTEP_P2P:
 		BNX2X_ERR("One-step timestamping is not supported\n");
 		return -ERANGE;
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e6f18f6070ef..483935b001c8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -944,6 +944,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
 	dma_addr -= bp->rx_dma_offset;
 	dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
 			     DMA_ATTR_WEAK_ORDERING);
+	page_pool_release_page(rxr->page_pool, page);
 
 	if (unlikely(!payload))
 		payload = eth_get_headlen(bp->dev, data_ptr, len);
@@ -6997,7 +6998,6 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 
 		pf->fw_fid = le16_to_cpu(resp->fid);
 		pf->port_id = le16_to_cpu(resp->port_id);
-		bp->dev->dev_port = pf->port_id;
 		memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN);
 		pf->first_vf_id = le16_to_cpu(resp->first_vf_id);
 		pf->max_vfs = le16_to_cpu(resp->max_vfs);
@@ -7288,6 +7288,7 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
 		bp->hwrm_max_ext_req_len = HWRM_MAX_REQ_LEN;
 
 	bp->chip_num = le16_to_cpu(resp->chip_num);
+	bp->chip_rev = resp->chip_rev;
 	if (bp->chip_num == CHIP_NUM_58700 && !resp->chip_rev &&
 	    !resp->chip_metal)
 		bp->flags |= BNXT_FLAG_CHIP_NITRO_A0;
@@ -9063,7 +9064,7 @@ static int bnxt_update_phy_setting(struct bnxt *bp)
 	/* The last close may have shutdown the link, so need to call
 	 * PHY_CFG to bring it back up.
 	 */
-	if (!netif_carrier_ok(bp->dev))
+	if (!bp->link_info.link_up)
 		update_link = true;
 
 	if (!bnxt_eee_config_ok(bp))
@@ -9975,7 +9976,7 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
 	}
 }
 
-static void bnxt_tx_timeout(struct net_device *dev)
+static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
@@ -10040,6 +10041,13 @@ static void bnxt_timer(struct timer_list *t)
 		bnxt_queue_sp_work(bp);
 	}
 
+#ifdef CONFIG_RFS_ACCEL
+	if ((bp->flags & BNXT_FLAG_RFS) && bp->ntp_fltr_count) {
+		set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event);
+		bnxt_queue_sp_work(bp);
+	}
+#endif /*CONFIG_RFS_ACCEL*/
+
 	if (bp->link_info.phy_retry) {
 		if (time_after(jiffies, bp->link_info.phy_retry_expires)) {
 			bp->link_info.phy_retry = false;
@@ -10050,7 +10058,8 @@ static void bnxt_timer(struct timer_list *t)
 		}
 	}
 
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) && netif_carrier_ok(dev)) {
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) && !bp->chip_rev &&
+	    netif_carrier_ok(dev)) {
 		set_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event);
 		bnxt_queue_sp_work(bp);
 	}
@@ -10568,7 +10577,7 @@ static void bnxt_set_dflt_rss_hash_type(struct bnxt *bp)
 			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
 			   VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
 			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
-	if (BNXT_CHIP_P4(bp) && bp->hwrm_spec_code >= 0x10501) {
+	if (BNXT_CHIP_P4_PLUS(bp) && bp->hwrm_spec_code >= 0x10501) {
 		bp->flags |= BNXT_FLAG_UDP_RSS_CAP;
 		bp->rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 |
 				    VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
@@ -10822,6 +10831,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 		smp_mb__before_atomic();
 		clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
 		bnxt_ulp_start(bp, rc);
+		bnxt_dl_health_recovery_done(bp);
 		bnxt_dl_health_status_update(bp, true);
 		rtnl_unlock();
 		break;
@@ -11099,6 +11109,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 	struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb);
 	int rc = 0, idx, bit_id, l2_idx = 0;
 	struct hlist_head *head;
+	u32 flags;
 
 	if (!ether_addr_equal(dev->dev_addr, eth->h_dest)) {
 		struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
@@ -11138,8 +11149,9 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 		rc = -EPROTONOSUPPORT;
 		goto err_free;
 	}
-	if ((fkeys->control.flags & FLOW_DIS_ENCAPSULATION) &&
-	    bp->hwrm_spec_code < 0x10601) {
+	flags = fkeys->control.flags;
+	if (((flags & FLOW_DIS_ENCAPSULATION) &&
+	     bp->hwrm_spec_code < 0x10601) || (flags & FLOW_DIS_IS_FRAGMENT)) {
 		rc = -EPROTONOSUPPORT;
 		goto err_free;
 	}
@@ -11376,8 +11388,8 @@ int bnxt_get_port_parent_id(struct net_device *dev,
 	if (!BNXT_PF(bp) || !(bp->flags & BNXT_FLAG_DSN_VALID))
 		return -EOPNOTSUPP;
 
-	ppid->id_len = sizeof(bp->switch_id);
-	memcpy(ppid->id, bp->switch_id, ppid->id_len);
+	ppid->id_len = sizeof(bp->dsn);
+	memcpy(ppid->id, bp->dsn, ppid->id_len);
 
 	return 0;
 }
@@ -11433,9 +11445,9 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 		bnxt_sriov_disable(bp);
 
 	bnxt_dl_fw_reporters_destroy(bp, true);
-	bnxt_dl_unregister(bp);
 	pci_disable_pcie_error_reporting(pdev);
 	unregister_netdev(dev);
+	bnxt_dl_unregister(bp);
 	bnxt_shutdown_tc(bp);
 	bnxt_cancel_sp_work(bp);
 	bp->sp_event = 0;
@@ -11469,6 +11481,9 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
 			   rc);
 		return rc;
 	}
+	if (!fw_dflt)
+		return 0;
+
 	rc = bnxt_update_link(bp, false);
 	if (rc) {
 		netdev_err(bp->dev, "Probe phy can't update link (rc: %x)\n",
@@ -11482,9 +11497,6 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
 	if (link_info->auto_link_speeds && !link_info->support_auto_speeds)
 		link_info->support_auto_speeds = link_info->support_speeds;
 
-	if (!fw_dflt)
-		return 0;
-
 	bnxt_init_ethtool_link_settings(bp);
 	return 0;
 }
@@ -11858,7 +11870,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	if (BNXT_PF(bp)) {
 		/* Read the adapter's DSN to use as the eswitch switch_id */
-		bnxt_pcie_dsn_get(bp, bp->switch_id);
+		rc = bnxt_pcie_dsn_get(bp, bp->dsn);
 	}
 
 	/* MTU range: 60 - FW defined max */
@@ -11905,11 +11917,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		bnxt_init_tc(bp);
 	}
 
+	bnxt_dl_register(bp);
+
 	rc = register_netdev(dev);
 	if (rc)
-		goto init_err_cleanup_tc;
+		goto init_err_cleanup;
 
-	bnxt_dl_register(bp);
+	if (BNXT_PF(bp))
+		devlink_port_type_eth_set(&bp->dl_port, bp->dev);
 	bnxt_dl_fw_reporters_create(bp);
 
 	netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
@@ -11919,7 +11934,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	return 0;
 
-init_err_cleanup_tc:
+init_err_cleanup:
+	bnxt_dl_unregister(bp);
 	bnxt_shutdown_tc(bp);
 	bnxt_clear_int_mode(bp);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index f14335433a64..cabef0b4f5fb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1457,6 +1457,8 @@ struct bnxt {
 #define CHIP_NUM_58804		0xd804
 #define CHIP_NUM_58808		0xd808
 
+	u8			chip_rev;
+
 #define BNXT_CHIP_NUM_5730X(chip_num)		\
 	((chip_num) >= CHIP_NUM_57301 &&	\
 	 (chip_num) <= CHIP_NUM_57304)
@@ -1846,7 +1848,7 @@ struct bnxt {
 	enum devlink_eswitch_mode eswitch_mode;
 	struct bnxt_vf_rep	**vf_reps; /* array of vf-rep ptrs */
 	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
-	u8			switch_id[8];
+	u8			dsn[8];
 	struct bnxt_tc_info	*tc_info;
 	struct list_head	tc_indr_block_list;
 	struct notifier_block	tc_netdev_nb;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 3eedd4477218..eec0168330b7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -21,6 +21,7 @@ bnxt_dl_flash_update(struct devlink *dl, const char *filename,
 		     const char *region, struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+	int rc;
 
 	if (region)
 		return -EOPNOTSUPP;
@@ -31,7 +32,18 @@ bnxt_dl_flash_update(struct devlink *dl, const char *filename,
 		return -EPERM;
 	}
 
-	return bnxt_flash_package_from_file(bp->dev, filename, 0);
+	devlink_flash_update_begin_notify(dl);
+	devlink_flash_update_status_notify(dl, "Preparing to flash", region, 0,
+					   0);
+	rc = bnxt_flash_package_from_file(bp->dev, filename, 0);
+	if (!rc)
+		devlink_flash_update_status_notify(dl, "Flashing done", region,
+						   0, 0);
+	else
+		devlink_flash_update_status_notify(dl, "Flashing failed",
+						   region, 0, 0);
+	devlink_flash_update_end_notify(dl);
+	return rc;
 }
 
 static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
@@ -89,7 +101,7 @@ static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
 		return -EOPNOTSUPP;
 
 	bnxt_fw_reset(bp);
-	return 0;
+	return -EINPROGRESS;
 }
 
 static const
@@ -116,7 +128,7 @@ static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
 	else if (event == BNXT_FW_EXCEPTION_SP_EVENT)
 		bnxt_fw_exception(bp);
 
-	return 0;
+	return -EINPROGRESS;
 }
 
 static const
@@ -262,11 +274,25 @@ void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy)
 	health->fatal = false;
 }
 
+void bnxt_dl_health_recovery_done(struct bnxt *bp)
+{
+	struct bnxt_fw_health *hlth = bp->fw_health;
+
+	if (hlth->fatal)
+		devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter);
+	else
+		devlink_health_reporter_recovery_done(hlth->fw_reset_reporter);
+}
+
+static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+			    struct netlink_ext_ack *extack);
+
 static const struct devlink_ops bnxt_dl_ops = {
 #ifdef CONFIG_BNXT_SRIOV
 	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
 	.eswitch_mode_get = bnxt_dl_eswitch_mode_get,
 #endif /* CONFIG_BNXT_SRIOV */
+	.info_get	  = bnxt_dl_info_get,
 	.flash_update	  = bnxt_dl_flash_update,
 };
 
@@ -333,6 +359,136 @@ static void bnxt_copy_from_nvm_data(union devlink_param_value *dst,
 		dst->vu8 = (u8)val32;
 }
 
+static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp,
+				     union devlink_param_value *nvm_cfg_ver)
+{
+	struct hwrm_nvm_get_variable_input req = {0};
+	union bnxt_nvm_data *data;
+	dma_addr_t data_dma_addr;
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_VARIABLE, -1, -1);
+	data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
+				  &data_dma_addr, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	req.dest_data_addr = cpu_to_le64(data_dma_addr);
+	req.data_len = cpu_to_le16(BNXT_NVM_CFG_VER_BITS);
+	req.option_num = cpu_to_le16(NVM_OFF_NVM_CFG_VER);
+
+	rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		bnxt_copy_from_nvm_data(nvm_cfg_ver, data,
+					BNXT_NVM_CFG_VER_BITS,
+					BNXT_NVM_CFG_VER_BYTES);
+
+	dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
+	return rc;
+}
+
+static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+			    struct netlink_ext_ack *extack)
+{
+	struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+	union devlink_param_value nvm_cfg_ver;
+	struct hwrm_ver_get_output *ver_resp;
+	char mgmt_ver[FW_VER_STR_LEN];
+	char roce_ver[FW_VER_STR_LEN];
+	char fw_ver[FW_VER_STR_LEN];
+	char buf[32];
+	int rc;
+
+	rc = devlink_info_driver_name_put(req, DRV_MODULE_NAME);
+	if (rc)
+		return rc;
+
+	sprintf(buf, "%X", bp->chip_num);
+	rc = devlink_info_version_fixed_put(req,
+			DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, buf);
+	if (rc)
+		return rc;
+
+	ver_resp = &bp->ver_resp;
+	sprintf(buf, "%X", ver_resp->chip_rev);
+	rc = devlink_info_version_fixed_put(req,
+			DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, buf);
+	if (rc)
+		return rc;
+
+	if (BNXT_PF(bp)) {
+		sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X",
+			bp->dsn[7], bp->dsn[6], bp->dsn[5], bp->dsn[4],
+			bp->dsn[3], bp->dsn[2], bp->dsn[1], bp->dsn[0]);
+		rc = devlink_info_serial_number_put(req, buf);
+		if (rc)
+			return rc;
+	}
+
+	if (strlen(ver_resp->active_pkg_name)) {
+		rc =
+		    devlink_info_version_running_put(req,
+					DEVLINK_INFO_VERSION_GENERIC_FW,
+					ver_resp->active_pkg_name);
+		if (rc)
+			return rc;
+	}
+
+	if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) {
+		u32 ver = nvm_cfg_ver.vu32;
+
+		sprintf(buf, "%X.%X.%X", (ver >> 16) & 0xF, (ver >> 8) & 0xF,
+			ver & 0xF);
+		rc = devlink_info_version_running_put(req,
+				DEVLINK_INFO_VERSION_GENERIC_FW_PSID, buf);
+		if (rc)
+			return rc;
+	}
+
+	if (ver_resp->flags & VER_GET_RESP_FLAGS_EXT_VER_AVAIL) {
+		snprintf(fw_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+			 ver_resp->hwrm_fw_major, ver_resp->hwrm_fw_minor,
+			 ver_resp->hwrm_fw_build, ver_resp->hwrm_fw_patch);
+
+		snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+			 ver_resp->mgmt_fw_major, ver_resp->mgmt_fw_minor,
+			 ver_resp->mgmt_fw_build, ver_resp->mgmt_fw_patch);
+
+		snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+			 ver_resp->roce_fw_major, ver_resp->roce_fw_minor,
+			 ver_resp->roce_fw_build, ver_resp->roce_fw_patch);
+	} else {
+		snprintf(fw_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+			 ver_resp->hwrm_fw_maj_8b, ver_resp->hwrm_fw_min_8b,
+			 ver_resp->hwrm_fw_bld_8b, ver_resp->hwrm_fw_rsvd_8b);
+
+		snprintf(mgmt_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+			 ver_resp->mgmt_fw_maj_8b, ver_resp->mgmt_fw_min_8b,
+			 ver_resp->mgmt_fw_bld_8b, ver_resp->mgmt_fw_rsvd_8b);
+
+		snprintf(roce_ver, FW_VER_STR_LEN, "%d.%d.%d.%d",
+			 ver_resp->roce_fw_maj_8b, ver_resp->roce_fw_min_8b,
+			 ver_resp->roce_fw_bld_8b, ver_resp->roce_fw_rsvd_8b);
+	}
+	rc = devlink_info_version_running_put(req,
+			DEVLINK_INFO_VERSION_GENERIC_FW_APP, fw_ver);
+	if (rc)
+		return rc;
+
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
+		rc = devlink_info_version_running_put(req,
+			DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, mgmt_ver);
+		if (rc)
+			return rc;
+
+		rc = devlink_info_version_running_put(req,
+			DEVLINK_INFO_VERSION_GENERIC_FW_ROCE, roce_ver);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
 			     int msg_len, union devlink_param_value *val)
 {
@@ -475,15 +631,48 @@ static const struct devlink_param bnxt_dl_params[] = {
 static const struct devlink_param bnxt_dl_port_params[] = {
 };
 
-int bnxt_dl_register(struct bnxt *bp)
+static int bnxt_dl_params_register(struct bnxt *bp)
 {
-	struct devlink *dl;
 	int rc;
 
-	if (bp->hwrm_spec_code < 0x10600) {
-		netdev_warn(bp->dev, "Firmware does not support NVM params");
-		return -ENOTSUPP;
+	if (bp->hwrm_spec_code < 0x10600)
+		return 0;
+
+	rc = devlink_params_register(bp->dl, bnxt_dl_params,
+				     ARRAY_SIZE(bnxt_dl_params));
+	if (rc) {
+		netdev_warn(bp->dev, "devlink_params_register failed. rc=%d",
+			    rc);
+		return rc;
 	}
+	rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params,
+					  ARRAY_SIZE(bnxt_dl_port_params));
+	if (rc) {
+		netdev_err(bp->dev, "devlink_port_params_register failed");
+		devlink_params_unregister(bp->dl, bnxt_dl_params,
+					  ARRAY_SIZE(bnxt_dl_params));
+		return rc;
+	}
+	devlink_params_publish(bp->dl);
+
+	return 0;
+}
+
+static void bnxt_dl_params_unregister(struct bnxt *bp)
+{
+	if (bp->hwrm_spec_code < 0x10600)
+		return;
+
+	devlink_params_unregister(bp->dl, bnxt_dl_params,
+				  ARRAY_SIZE(bnxt_dl_params));
+	devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
+				       ARRAY_SIZE(bnxt_dl_port_params));
+}
+
+int bnxt_dl_register(struct bnxt *bp)
+{
+	struct devlink *dl;
+	int rc;
 
 	if (BNXT_PF(bp))
 		dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
@@ -510,40 +699,23 @@ int bnxt_dl_register(struct bnxt *bp)
 	if (!BNXT_PF(bp))
 		return 0;
 
-	rc = devlink_params_register(dl, bnxt_dl_params,
-				     ARRAY_SIZE(bnxt_dl_params));
-	if (rc) {
-		netdev_warn(bp->dev, "devlink_params_register failed. rc=%d",
-			    rc);
-		goto err_dl_unreg;
-	}
-
 	devlink_port_attrs_set(&bp->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL,
-			       bp->pf.port_id, false, 0,
-			       bp->switch_id, sizeof(bp->switch_id));
+			       bp->pf.port_id, false, 0, bp->dsn,
+			       sizeof(bp->dsn));
 	rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id);
 	if (rc) {
 		netdev_err(bp->dev, "devlink_port_register failed");
-		goto err_dl_param_unreg;
+		goto err_dl_unreg;
 	}
-	devlink_port_type_eth_set(&bp->dl_port, bp->dev);
 
-	rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params,
-					  ARRAY_SIZE(bnxt_dl_port_params));
-	if (rc) {
-		netdev_err(bp->dev, "devlink_port_params_register failed");
+	rc = bnxt_dl_params_register(bp);
+	if (rc)
 		goto err_dl_port_unreg;
-	}
-
-	devlink_params_publish(dl);
 
 	return 0;
 
 err_dl_port_unreg:
 	devlink_port_unregister(&bp->dl_port);
-err_dl_param_unreg:
-	devlink_params_unregister(dl, bnxt_dl_params,
-				  ARRAY_SIZE(bnxt_dl_params));
 err_dl_unreg:
 	devlink_unregister(dl);
 err_dl_free:
@@ -560,12 +732,8 @@ void bnxt_dl_unregister(struct bnxt *bp)
 		return;
 
 	if (BNXT_PF(bp)) {
-		devlink_port_params_unregister(&bp->dl_port,
-					       bnxt_dl_port_params,
-					       ARRAY_SIZE(bnxt_dl_port_params));
+		bnxt_dl_params_unregister(bp);
 		devlink_port_unregister(&bp->dl_port);
-		devlink_params_unregister(dl, bnxt_dl_params,
-					  ARRAY_SIZE(bnxt_dl_params));
 	}
 	devlink_unregister(dl);
 	devlink_free(dl);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
index 6db6c3dac472..95f893f2a74d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -38,6 +38,10 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
 #define NVM_OFF_IGNORE_ARI		164
 #define NVM_OFF_DIS_GRE_VER_CHECK	171
 #define NVM_OFF_ENABLE_SRIOV		401
+#define NVM_OFF_NVM_CFG_VER		602
+
+#define BNXT_NVM_CFG_VER_BITS		24
+#define BNXT_NVM_CFG_VER_BYTES		4
 
 #define BNXT_MSIX_VEC_MAX	1280
 #define BNXT_MSIX_VEC_MIN_MAX	128
@@ -58,6 +62,7 @@ struct bnxt_dl_nvm_param {
 
 void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
 void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy);
+void bnxt_dl_health_recovery_done(struct bnxt *bp);
 void bnxt_dl_fw_reporters_create(struct bnxt *bp);
 void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all);
 int bnxt_dl_register(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 08d56ec7b68a..6171fa8b3677 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1462,15 +1462,15 @@ static int bnxt_get_link_ksettings(struct net_device *dev,
 		ethtool_link_ksettings_add_link_mode(lk_ksettings,
 						     advertising, Autoneg);
 		base->autoneg = AUTONEG_ENABLE;
-		if (link_info->phy_link_status == BNXT_LINK_LINK)
+		base->duplex = DUPLEX_UNKNOWN;
+		if (link_info->phy_link_status == BNXT_LINK_LINK) {
 			bnxt_fw_to_ethtool_lp_adv(link_info, lk_ksettings);
+			if (link_info->duplex & BNXT_LINK_DUPLEX_FULL)
+				base->duplex = DUPLEX_FULL;
+			else
+				base->duplex = DUPLEX_HALF;
+		}
 		ethtool_speed = bnxt_fw_to_ethtool_speed(link_info->link_speed);
-		if (!netif_carrier_ok(dev))
-			base->duplex = DUPLEX_UNKNOWN;
-		else if (link_info->duplex & BNXT_LINK_DUPLEX_FULL)
-			base->duplex = DUPLEX_FULL;
-		else
-			base->duplex = DUPLEX_HALF;
 	} else {
 		base->autoneg = AUTONEG_DISABLE;
 		ethtool_speed =
@@ -2707,7 +2707,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
 		return rc;
 
 	fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB;
-	if (netif_carrier_ok(bp->dev))
+	if (bp->link_info.link_up)
 		fw_speed = bp->link_info.link_speed;
 	else if (fw_advertising & BNXT_LINK_SPEED_MSK_10GB)
 		fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 120fa05a39ff..e50a15397e11 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2,7 +2,7 @@
 /*
  * Broadcom GENET (Gigabit Ethernet) controller driver
  *
- * Copyright (c) 2014-2017 Broadcom
+ * Copyright (c) 2014-2019 Broadcom
  */
 
 #define pr_fmt(fmt)				"bcmgenet: " fmt
@@ -508,8 +508,8 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev,
 	return phy_ethtool_ksettings_set(dev->phydev, cmd);
 }
 
-static int bcmgenet_set_rx_csum(struct net_device *dev,
-				netdev_features_t wanted)
+static void bcmgenet_set_rx_csum(struct net_device *dev,
+				 netdev_features_t wanted)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	u32 rbuf_chk_ctrl;
@@ -521,7 +521,7 @@ static int bcmgenet_set_rx_csum(struct net_device *dev,
 
 	/* enable rx checksumming */
 	if (rx_csum_en)
-		rbuf_chk_ctrl |= RBUF_RXCHK_EN;
+		rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS;
 	else
 		rbuf_chk_ctrl &= ~RBUF_RXCHK_EN;
 	priv->desc_rxchk_en = rx_csum_en;
@@ -535,12 +535,10 @@ static int bcmgenet_set_rx_csum(struct net_device *dev,
 		rbuf_chk_ctrl &= ~RBUF_SKIP_FCS;
 
 	bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL);
-
-	return 0;
 }
 
-static int bcmgenet_set_tx_csum(struct net_device *dev,
-				netdev_features_t wanted)
+static void bcmgenet_set_tx_csum(struct net_device *dev,
+				 netdev_features_t wanted)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	bool desc_64b_en;
@@ -549,7 +547,7 @@ static int bcmgenet_set_tx_csum(struct net_device *dev,
 	tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv);
 	rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL);
 
-	desc_64b_en = !!(wanted & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM));
+	desc_64b_en = !!(wanted & NETIF_F_HW_CSUM);
 
 	/* enable 64 bytes descriptor in both directions (RBUF and TBUF) */
 	if (desc_64b_en) {
@@ -563,21 +561,27 @@ static int bcmgenet_set_tx_csum(struct net_device *dev,
 
 	bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl);
 	bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL);
-
-	return 0;
 }
 
 static int bcmgenet_set_features(struct net_device *dev,
 				 netdev_features_t features)
 {
-	netdev_features_t changed = features ^ dev->features;
-	netdev_features_t wanted = dev->wanted_features;
-	int ret = 0;
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	u32 reg;
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		return ret;
 
-	if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
-		ret = bcmgenet_set_tx_csum(dev, wanted);
-	if (changed & (NETIF_F_RXCSUM))
-		ret = bcmgenet_set_rx_csum(dev, wanted);
+	/* Make sure we reflect the value of CRC_CMD_FWD */
+	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+	priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
+
+	bcmgenet_set_tx_csum(dev, features);
+	bcmgenet_set_rx_csum(dev, features);
+
+	clk_disable_unprepare(priv->clk);
 
 	return ret;
 }
@@ -857,6 +861,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
 	STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
 	STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
 	STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed),
+	STAT_GENET_SOFT_MIB("tx_realloc_tsb", mib.tx_realloc_tsb),
+	STAT_GENET_SOFT_MIB("tx_realloc_tsb_failed",
+			    mib.tx_realloc_tsb_failed),
 	/* Per TX queues */
 	STAT_GENET_Q(0),
 	STAT_GENET_Q(1),
@@ -1218,18 +1225,6 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 	}
 }
 
-/* ioctl handle special commands that are not present in ethtool. */
-static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	if (!dev->phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
 static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv,
 					 struct bcmgenet_tx_ring *ring)
 {
@@ -1483,6 +1478,7 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev)
 static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
 					    struct sk_buff *skb)
 {
+	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct status_64 *status = NULL;
 	struct sk_buff *new_skb;
 	u16 offset;
@@ -1495,12 +1491,15 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
 		 * enough headroom for us to insert 64B status block.
 		 */
 		new_skb = skb_realloc_headroom(skb, sizeof(*status));
-		dev_kfree_skb(skb);
 		if (!new_skb) {
+			dev_kfree_skb_any(skb);
+			priv->mib.tx_realloc_tsb_failed++;
 			dev->stats.tx_dropped++;
 			return NULL;
 		}
+		dev_consume_skb_any(skb);
 		skb = new_skb;
+		priv->mib.tx_realloc_tsb++;
 	}
 
 	skb_push(skb, sizeof(*status));
@@ -1516,24 +1515,19 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
 			ip_proto = ipv6_hdr(skb)->nexthdr;
 			break;
 		default:
-			return skb;
+			/* don't use UDP flag */
+			ip_proto = 0;
+			break;
 		}
 
 		offset = skb_checksum_start_offset(skb) - sizeof(*status);
 		tx_csum_info = (offset << STATUS_TX_CSUM_START_SHIFT) |
-				(offset + skb->csum_offset);
+				(offset + skb->csum_offset) |
+				STATUS_TX_CSUM_LV;
 
-		/* Set the length valid bit for TCP and UDP and just set
-		 * the special UDP flag for IPv4, else just set to 0.
-		 */
-		if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP) {
-			tx_csum_info |= STATUS_TX_CSUM_LV;
-			if (ip_proto == IPPROTO_UDP &&
-			    ip_ver == htons(ETH_P_IP))
-				tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP;
-		} else {
-			tx_csum_info = 0;
-		}
+		/* Set the special UDP flag for UDP */
+		if (ip_proto == IPPROTO_UDP)
+			tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP;
 
 		status->tx_csum_info = tx_csum_info;
 	}
@@ -1744,7 +1738,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 	unsigned int bytes_processed = 0;
 	unsigned int p_index, mask;
 	unsigned int discards;
-	unsigned int chksum_ok = 0;
 
 	/* Clear status before servicing to reduce spurious interrupts */
 	if (ring->index == DESC_INDEX) {
@@ -1795,9 +1788,15 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 				dmadesc_get_length_status(priv, cb->bd_addr);
 		} else {
 			struct status_64 *status;
+			__be16 rx_csum;
 
 			status = (struct status_64 *)skb->data;
 			dma_length_status = status->length_status;
+			rx_csum = (__force __be16)(status->rx_csum & 0xffff);
+			if (priv->desc_rxchk_en) {
+				skb->csum = (__force __wsum)ntohs(rx_csum);
+				skb->ip_summed = CHECKSUM_COMPLETE;
+			}
 		}
 
 		/* DMA flags and length are still valid no matter how
@@ -1840,18 +1839,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 			goto next;
 		} /* error packet */
 
-		chksum_ok = (dma_flag & priv->dma_rx_chk_bit) &&
-			     priv->desc_rxchk_en;
-
 		skb_put(skb, len);
 		if (priv->desc_64b_en) {
 			skb_pull(skb, 64);
 			len -= 64;
 		}
 
-		if (likely(chksum_ok))
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-
 		/* remove hardware 2bytes added for IP alignment */
 		skb_pull(skb, 2);
 		len -= 2;
@@ -2164,8 +2157,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
 				  DMA_END_ADDR);
 
 	/* Initialize Tx NAPI */
-	netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll,
-		       NAPI_POLL_WEIGHT);
+	netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll,
+			  NAPI_POLL_WEIGHT);
 }
 
 /* Initialize a RDMA ring */
@@ -2886,9 +2879,10 @@ static int bcmgenet_open(struct net_device *dev)
 
 	init_umac(priv);
 
-	/* Make sure we reflect the value of CRC_CMD_FWD */
-	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
-	priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
+	/* Apply features again in case we changed them while interface was
+	 * down
+	 */
+	bcmgenet_set_features(dev, dev->features);
 
 	bcmgenet_set_hw_addr(priv, dev->dev_addr);
 
@@ -3055,7 +3049,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring)
 		  ring->cb_ptr, ring->end_ptr);
 }
 
-static void bcmgenet_timeout(struct net_device *dev)
+static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	u32 int0_enable = 0;
@@ -3216,7 +3210,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = {
 	.ndo_tx_timeout		= bcmgenet_timeout,
 	.ndo_set_rx_mode	= bcmgenet_set_rx_mode,
 	.ndo_set_mac_address	= bcmgenet_set_mac_addr,
-	.ndo_do_ioctl		= bcmgenet_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_set_features	= bcmgenet_set_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= bcmgenet_poll_controller,
@@ -3327,19 +3321,15 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
 	if (GENET_IS_V5(priv) || GENET_IS_V4(priv)) {
 		bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus;
 		genet_dma_ring_regs = genet_dma_ring_regs_v4;
-		priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS;
 	} else if (GENET_IS_V3(priv)) {
 		bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus;
 		genet_dma_ring_regs = genet_dma_ring_regs_v123;
-		priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS;
 	} else if (GENET_IS_V2(priv)) {
 		bcmgenet_dma_regs = bcmgenet_dma_regs_v2;
 		genet_dma_ring_regs = genet_dma_ring_regs_v123;
-		priv->dma_rx_chk_bit = DMA_RX_CHK_V12;
 	} else if (GENET_IS_V1(priv)) {
 		bcmgenet_dma_regs = bcmgenet_dma_regs_v1;
 		genet_dma_ring_regs = genet_dma_ring_regs_v123;
-		priv->dma_rx_chk_bit = DMA_RX_CHK_V12;
 	}
 
 	/* enum genet_version starts at 1 */
@@ -3535,9 +3525,11 @@ static int bcmgenet_probe(struct platform_device *pdev)
 
 	priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT);
 
-	/* Set hardware features */
-	dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM |
-		NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM;
+	/* Set default features */
+	dev->features |= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+			 NETIF_F_RXCSUM;
+	dev->hw_features |= dev->features;
+	dev->vlan_features |= dev->features;
 
 	/* Request the WOL interrupt and advertise suspend if available */
 	priv->wol_irq_disabled = true;
@@ -3574,6 +3566,14 @@ static int bcmgenet_probe(struct platform_device *pdev)
 
 	bcmgenet_set_hw_params(priv);
 
+	err = -EIO;
+	if (priv->hw_params->flags & GENET_HAS_40BITS)
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+	if (err)
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (err)
+		goto err;
+
 	/* Mii wait queue */
 	init_waitqueue_head(&priv->wq);
 	/* Always use RX_BUF_LENGTH (2KB) buffer for all chips */
@@ -3689,6 +3689,9 @@ static int bcmgenet_resume(struct device *d)
 	genphy_config_aneg(dev->phydev);
 	bcmgenet_mii_config(priv->dev, false);
 
+	/* Restore enabled features */
+	bcmgenet_set_features(dev, dev->features);
+
 	bcmgenet_set_hw_addr(priv, dev->dev_addr);
 
 	if (priv->internal_phy) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index a5659197598f..61a6fe9f4cec 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -144,6 +144,8 @@ struct bcmgenet_mib_counters {
 	u32	alloc_rx_buff_failed;
 	u32	rx_dma_failed;
 	u32	tx_dma_failed;
+	u32	tx_realloc_tsb;
+	u32	tx_realloc_tsb_failed;
 };
 
 #define UMAC_HD_BKP_CTRL		0x004
@@ -251,6 +253,7 @@ struct bcmgenet_mib_counters {
 #define RBUF_CHK_CTRL			0x14
 #define  RBUF_RXCHK_EN			(1 << 0)
 #define  RBUF_SKIP_FCS			(1 << 4)
+#define  RBUF_L3_PARSE_DIS		(1 << 5)
 
 #define RBUF_ENERGY_CTRL		0x9c
 #define  RBUF_EEE_EN			(1 << 0)
@@ -663,7 +666,6 @@ struct bcmgenet_priv {
 	bool desc_rxchk_en;
 	bool crc_fwd_en;
 
-	unsigned int dma_rx_chk_bit;
 	u32 dma_max_burst_length;
 
 	u32 msg_enable;
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 1604ad32e920..5b4568c2ad1c 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -294,7 +294,7 @@ static int sbmac_set_duplex(struct sbmac_softc *s, enum sbmac_duplex duplex,
 			    enum sbmac_fc fc);
 
 static int sbmac_open(struct net_device *dev);
-static void sbmac_tx_timeout (struct net_device *dev);
+static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue);
 static void sbmac_set_rx_mode(struct net_device *dev);
 static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static int sbmac_close(struct net_device *dev);
@@ -2419,7 +2419,7 @@ static void sbmac_mii_poll(struct net_device *dev)
 }
 
 
-static void sbmac_tx_timeout (struct net_device *dev)
+static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	struct sbmac_softc *sc = netdev_priv(dev);
 	unsigned long flags;
@@ -2537,7 +2537,7 @@ static int sbmac_probe(struct platform_device *pldev)
 
 	res = platform_get_resource(pldev, IORESOURCE_MEM, 0);
 	BUG_ON(!res);
-	sbm_base = ioremap_nocache(res->start, resource_size(res));
+	sbm_base = ioremap(res->start, resource_size(res));
 	if (!sbm_base) {
 		printk(KERN_ERR "%s: unable to map device registers\n",
 		       dev_name(&pldev->dev));
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ca3aa1250dd1..88466255bf66 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7645,7 +7645,7 @@ static void tg3_poll_controller(struct net_device *dev)
 }
 #endif
 
-static void tg3_tx_timeout(struct net_device *dev)
+static void tg3_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
@@ -7874,8 +7874,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 		       struct netdev_queue *txq, struct sk_buff *skb)
 {
-	struct sk_buff *segs, *nskb;
 	u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3;
+	struct sk_buff *segs, *seg, *next;
 
 	/* Estimate the number of fragments in the worst case */
 	if (unlikely(tg3_tx_avail(tnapi) <= frag_cnt_est)) {
@@ -7898,12 +7898,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 	if (IS_ERR(segs) || !segs)
 		goto tg3_tso_bug_end;
 
-	do {
-		nskb = segs;
-		segs = segs->next;
-		nskb->next = NULL;
-		tg3_start_xmit(nskb, tp->dev);
-	} while (segs);
+	skb_list_walk_safe(segs, seg, next) {
+		skb_mark_not_on_list(seg);
+		tg3_start_xmit(seg, tp->dev);
+	}
 
 tg3_tso_bug_end:
 	dev_consume_skb_any(skb);
diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index 4042c2185e98..e17bfc87da90 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -1124,11 +1124,10 @@ bfa_nw_ioc_sem_release(void __iomem *sem_reg)
 static void
 bfa_ioc_fwver_clear(struct bfa_ioc *ioc)
 {
-	u32 pgnum, pgoff, loff = 0;
+	u32 pgnum, loff = 0;
 	int i;
 
 	pgnum = PSS_SMEM_PGNUM(ioc->ioc_regs.smem_pg0, loff);
-	pgoff = PSS_SMEM_PGOFF(loff);
 	writel(pgnum, ioc->ioc_regs.host_page_num_fn);
 
 	for (i = 0; i < (sizeof(struct bfi_ioc_image_hdr) / sizeof(u32)); i++) {
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index e338272931d1..01a50a4b2113 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -3477,7 +3477,7 @@ bnad_init(struct bnad *bnad,
 	bnad->pcidev = pdev;
 	bnad->mmio_start = pci_resource_start(pdev, 0);
 	bnad->mmio_len = pci_resource_len(pdev, 0);
-	bnad->bar0 = ioremap_nocache(bnad->mmio_start, bnad->mmio_len);
+	bnad->bar0 = ioremap(bnad->mmio_start, bnad->mmio_len);
 	if (!bnad->bar0) {
 		dev_err(&pdev->dev, "ioremap for bar0 failed\n");
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 19fe4f4867c7..dbf7070fcdba 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -630,10 +630,17 @@
 #define GEM_CLK_DIV96				5
 
 /* Constants for MAN register */
-#define MACB_MAN_SOF				1
-#define MACB_MAN_WRITE				1
-#define MACB_MAN_READ				2
-#define MACB_MAN_CODE				2
+#define MACB_MAN_C22_SOF			1
+#define MACB_MAN_C22_WRITE			1
+#define MACB_MAN_C22_READ			2
+#define MACB_MAN_C22_CODE			2
+
+#define MACB_MAN_C45_SOF			0
+#define MACB_MAN_C45_ADDR			0
+#define MACB_MAN_C45_WRITE			1
+#define MACB_MAN_C45_POST_READ_INCR		2
+#define MACB_MAN_C45_READ			3
+#define MACB_MAN_C45_CODE			2
 
 /* Capability mask bits */
 #define MACB_CAPS_ISR_CLEAR_ON_WRITE		0x00000001
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index f7d87c71aaa9..7a2fe63d1136 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -337,11 +337,30 @@ static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 	if (status < 0)
 		goto mdio_read_exit;
 
-	macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
-			      | MACB_BF(RW, MACB_MAN_READ)
-			      | MACB_BF(PHYA, mii_id)
-			      | MACB_BF(REGA, regnum)
-			      | MACB_BF(CODE, MACB_MAN_CODE)));
+	if (regnum & MII_ADDR_C45) {
+		macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+			    | MACB_BF(RW, MACB_MAN_C45_ADDR)
+			    | MACB_BF(PHYA, mii_id)
+			    | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+			    | MACB_BF(DATA, regnum & 0xFFFF)
+			    | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+
+		status = macb_mdio_wait_for_idle(bp);
+		if (status < 0)
+			goto mdio_read_exit;
+
+		macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+			    | MACB_BF(RW, MACB_MAN_C45_READ)
+			    | MACB_BF(PHYA, mii_id)
+			    | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+			    | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+	} else {
+		macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF)
+				| MACB_BF(RW, MACB_MAN_C22_READ)
+				| MACB_BF(PHYA, mii_id)
+				| MACB_BF(REGA, regnum)
+				| MACB_BF(CODE, MACB_MAN_C22_CODE)));
+	}
 
 	status = macb_mdio_wait_for_idle(bp);
 	if (status < 0)
@@ -370,12 +389,32 @@ static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	if (status < 0)
 		goto mdio_write_exit;
 
-	macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
-			      | MACB_BF(RW, MACB_MAN_WRITE)
-			      | MACB_BF(PHYA, mii_id)
-			      | MACB_BF(REGA, regnum)
-			      | MACB_BF(CODE, MACB_MAN_CODE)
-			      | MACB_BF(DATA, value)));
+	if (regnum & MII_ADDR_C45) {
+		macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+			    | MACB_BF(RW, MACB_MAN_C45_ADDR)
+			    | MACB_BF(PHYA, mii_id)
+			    | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+			    | MACB_BF(DATA, regnum & 0xFFFF)
+			    | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+
+		status = macb_mdio_wait_for_idle(bp);
+		if (status < 0)
+			goto mdio_write_exit;
+
+		macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+			    | MACB_BF(RW, MACB_MAN_C45_WRITE)
+			    | MACB_BF(PHYA, mii_id)
+			    | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+			    | MACB_BF(CODE, MACB_MAN_C45_CODE)
+			    | MACB_BF(DATA, value)));
+	} else {
+		macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF)
+				| MACB_BF(RW, MACB_MAN_C22_WRITE)
+				| MACB_BF(PHYA, mii_id)
+				| MACB_BF(REGA, regnum)
+				| MACB_BF(CODE, MACB_MAN_C22_CODE)
+				| MACB_BF(DATA, value)));
+	}
 
 	status = macb_mdio_wait_for_idle(bp);
 	if (status < 0)
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index af04a2c81adb..05a3d067c3fc 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1251,7 +1251,7 @@ static int xgmac_poll(struct napi_struct *napi, int budget)
  *   netdev structure and arrange for the device to be reset to a sane state
  *   in order to transmit a new packet.
  */
-static void xgmac_tx_timeout(struct net_device *dev)
+static void xgmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct xgmac_priv *priv = netdev_priv(dev);
 	schedule_work(&priv->tx_timeout_work);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 7f3b2e3b0868..eab05b5534ea 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2562,7 +2562,7 @@ lio_xmit_failed:
 /** \brief Network device Tx timeout
  * @param netdev    pointer to network device
  */
-static void liquidio_tx_timeout(struct net_device *netdev)
+static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct lio *lio;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 370d76822ee0..7a77544a54f5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1628,7 +1628,7 @@ lio_xmit_failed:
 /** \brief Network device Tx timeout
  * @param netdev    pointer to network device
  */
-static void liquidio_tx_timeout(struct net_device *netdev)
+static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct lio *lio;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
index f3f2e71431ac..600de587d7a9 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
@@ -31,7 +31,7 @@ static int lio_vf_rep_open(struct net_device *ndev);
 static int lio_vf_rep_stop(struct net_device *ndev);
 static netdev_tx_t lio_vf_rep_pkt_xmit(struct sk_buff *skb,
 				       struct net_device *ndev);
-static void lio_vf_rep_tx_timeout(struct net_device *netdev);
+static void lio_vf_rep_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 static int lio_vf_rep_phys_port_name(struct net_device *dev,
 				     char *buf, size_t len);
 static void lio_vf_rep_get_stats64(struct net_device *dev,
@@ -172,7 +172,7 @@ lio_vf_rep_stop(struct net_device *ndev)
 }
 
 static void
-lio_vf_rep_tx_timeout(struct net_device *ndev)
+lio_vf_rep_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	netif_trans_update(ndev);
 
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index cdd7e5da4a74..e9575887a4f8 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -790,9 +790,7 @@ static int octeon_mgmt_ioctl(struct net_device *netdev,
 	case SIOCSHWTSTAMP:
 		return octeon_mgmt_ioctl_hwtstamp(netdev, rq, cmd);
 	default:
-		if (netdev->phydev)
-			return phy_mii_ioctl(netdev->phydev, rq, cmd);
-		return -EINVAL;
+		return phy_do_ioctl(netdev, rq, cmd);
 	}
 }
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index f28409279ea4..016957285f99 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1741,7 +1741,7 @@ static void nicvf_get_stats64(struct net_device *netdev,
 
 }
 
-static void nicvf_tx_timeout(struct net_device *dev)
+static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct nicvf *nic = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 58f89f6a040f..883cfa9c4b6d 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -2448,6 +2448,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 
 		if (!is_offload(adapter))
 			return -EOPNOTSUPP;
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
 		if (!(adapter->flags & FULL_INIT_DONE))
 			return -EIO;	/* need the memory controllers */
 		if (copy_from_user(&t, useraddr, sizeof(t)))
@@ -3265,7 +3267,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_free_adapter;
 	}
 
-	adapter->regs = ioremap_nocache(mmio_start, mmio_len);
+	adapter->regs = ioremap(mmio_start, mmio_len);
 	if (!adapter->regs) {
 		dev_err(&pdev->dev, "cannot map device registers\n");
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index becee29f5df7..8b7d156f79d3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -56,6 +56,7 @@
 #include <asm/io.h>
 #include "t4_chip_type.h"
 #include "cxgb4_uld.h"
+#include "t4fw_api.h"
 
 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
 extern struct list_head adapter_list;
@@ -68,6 +69,16 @@ extern struct mutex uld_mutex;
 #define ETHTXQ_STOP_THRES \
 	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
 
+#define FW_PARAM_DEV(param) \
+	(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \
+	 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param))
+
+#define FW_PARAM_PFVF(param) \
+	(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
+	 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) |  \
+	 FW_PARAMS_PARAM_Y_V(0) | \
+	 FW_PARAMS_PARAM_Z_V(0))
+
 enum {
 	MAX_NPORTS	= 4,     /* max # of ports */
 	SERNUM_LEN	= 24,    /* Serial # length */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index aca9f7a20a2a..9d1f2f88b945 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -70,8 +70,7 @@ static void *seq_tab_start(struct seq_file *seq, loff_t *pos)
 static void *seq_tab_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	v = seq_tab_get_idx(seq->private, *pos + 1);
-	if (v)
-		++*pos;
+	++(*pos);
 	return v;
 }
 
@@ -3175,14 +3174,12 @@ static const struct file_operations mem_debugfs_fops = {
 
 static int tid_info_show(struct seq_file *seq, void *v)
 {
-	unsigned int tid_start = 0;
 	struct adapter *adap = seq->private;
-	const struct tid_info *t = &adap->tids;
-	enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
-
-	if (chip > CHELSIO_T5)
-		tid_start = t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A);
+	const struct tid_info *t;
+	enum chip_type chip;
 
+	t = &adap->tids;
+	chip = CHELSIO_CHIP_VERSION(adap->params.chip);
 	if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) {
 		unsigned int sb;
 		seq_printf(seq, "Connections in use: %u\n",
@@ -3194,9 +3191,9 @@ static int tid_info_show(struct seq_file *seq, void *v)
 			sb = t4_read_reg(adap, LE_DB_SRVR_START_INDEX_A);
 
 		if (sb) {
-			seq_printf(seq, "TID range: %u..%u/%u..%u", tid_start,
+			seq_printf(seq, "TID range: %u..%u/%u..%u", t->tid_base,
 				   sb - 1, adap->tids.hash_base,
-				   t->ntids - 1);
+				   t->tid_base + t->ntids - 1);
 			seq_printf(seq, ", in use: %u/%u\n",
 				   atomic_read(&t->tids_in_use),
 				   atomic_read(&t->hash_tids_in_use));
@@ -3205,14 +3202,14 @@ static int tid_info_show(struct seq_file *seq, void *v)
 				   t->aftid_base,
 				   t->aftid_end,
 				   adap->tids.hash_base,
-				   t->ntids - 1);
+				   t->tid_base + t->ntids - 1);
 			seq_printf(seq, ", in use: %u/%u\n",
 				   atomic_read(&t->tids_in_use),
 				   atomic_read(&t->hash_tids_in_use));
 		} else {
 			seq_printf(seq, "TID range: %u..%u",
 				   adap->tids.hash_base,
-				   t->ntids - 1);
+				   t->tid_base + t->ntids - 1);
 			seq_printf(seq, ", in use: %u\n",
 				   atomic_read(&t->hash_tids_in_use));
 		}
@@ -3220,8 +3217,8 @@ static int tid_info_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "Connections in use: %u\n",
 			   atomic_read(&t->conns_in_use));
 
-		seq_printf(seq, "TID range: %u..%u", tid_start,
-			   tid_start + t->ntids - 1);
+		seq_printf(seq, "TID range: %u..%u", t->tid_base,
+			   t->tid_base + t->ntids - 1);
 		seq_printf(seq, ", in use: %u\n",
 			   atomic_read(&t->tids_in_use));
 	}
@@ -3244,6 +3241,9 @@ static int tid_info_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "SFTID range: %u..%u in use: %u\n",
 			   t->sftid_base, t->sftid_base + t->nsftids - 2,
 			   t->sftids_in_use);
+	if (t->nhpftids)
+		seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base,
+			   t->hpftid_base + t->nhpftids - 1);
 	if (t->ntids)
 		seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n",
 			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A),
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 1d39fca11810..2a2938bbb93a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -361,20 +361,22 @@ static int get_filter_count(struct adapter *adapter, unsigned int fidx,
 
 	tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A);
 	if (is_hashfilter(adapter) && hash) {
-		if (fidx < adapter->tids.ntids) {
-			f = adapter->tids.tid_tab[fidx];
-			if (!f)
-				return -EINVAL;
-		} else {
+		if (tid_out_of_range(&adapter->tids, fidx))
 			return -E2BIG;
-		}
+		f = adapter->tids.tid_tab[fidx - adapter->tids.tid_base];
+		if (!f)
+			return -EINVAL;
 	} else {
-		if ((fidx != (adapter->tids.nftids +
-			      adapter->tids.nsftids - 1)) &&
-		    fidx >= adapter->tids.nftids)
+		if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids +
+			      adapter->tids.nhpftids - 1)) &&
+		    fidx >= (adapter->tids.nftids + adapter->tids.nhpftids))
 			return -E2BIG;
 
-		f = &adapter->tids.ftid_tab[fidx];
+		if (fidx < adapter->tids.nhpftids)
+			f = &adapter->tids.hpftid_tab[fidx];
+		else
+			f = &adapter->tids.ftid_tab[fidx -
+						    adapter->tids.nhpftids];
 		if (!f->valid)
 			return -EINVAL;
 	}
@@ -480,6 +482,7 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family)
 		ftid -= n;
 	}
 	spin_unlock_bh(&t->ftid_lock);
+	ftid += t->nhpftids;
 
 	return found ? ftid : -ENOMEM;
 }
@@ -507,6 +510,24 @@ static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family,
 	return 0;
 }
 
+static int cxgb4_set_hpftid(struct tid_info *t, int fidx, int family)
+{
+	spin_lock_bh(&t->ftid_lock);
+
+	if (test_bit(fidx, t->hpftid_bmap)) {
+		spin_unlock_bh(&t->ftid_lock);
+		return -EBUSY;
+	}
+
+	if (family == PF_INET)
+		__set_bit(fidx, t->hpftid_bmap);
+	else
+		bitmap_allocate_region(t->hpftid_bmap, fidx, 1);
+
+	spin_unlock_bh(&t->ftid_lock);
+	return 0;
+}
+
 static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family,
 			     unsigned int chip_ver)
 {
@@ -522,33 +543,58 @@ static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family,
 	spin_unlock_bh(&t->ftid_lock);
 }
 
+static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family)
+{
+	spin_lock_bh(&t->ftid_lock);
+
+	if (family == PF_INET)
+		__clear_bit(fidx, t->hpftid_bmap);
+	else
+		bitmap_release_region(t->hpftid_bmap, fidx, 1);
+
+	spin_unlock_bh(&t->ftid_lock);
+}
+
 bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio)
 {
+	struct filter_entry *prev_fe, *next_fe, *tab;
 	struct adapter *adap = netdev2adap(dev);
-	struct filter_entry *prev_fe, *next_fe;
+	u32 prev_ftid, next_ftid, max_tid;
 	struct tid_info *t = &adap->tids;
-	u32 prev_ftid, next_ftid;
+	unsigned long *bmap;
 	bool valid = true;
 
+	if (idx < t->nhpftids) {
+		bmap = t->hpftid_bmap;
+		tab = t->hpftid_tab;
+		max_tid = t->nhpftids;
+	} else {
+		idx -= t->nhpftids;
+		bmap = t->ftid_bmap;
+		tab = t->ftid_tab;
+		max_tid = t->nftids;
+	}
+
 	/* Only insert the rule if both of the following conditions
 	 * are met:
 	 * 1. The immediate previous rule has priority <= @prio.
 	 * 2. The immediate next rule has priority >= @prio.
 	 */
 	spin_lock_bh(&t->ftid_lock);
+
 	/* Don't insert if there's a rule already present at @idx. */
-	if (test_bit(idx, t->ftid_bmap)) {
+	if (test_bit(idx, bmap)) {
 		valid = false;
 		goto out_unlock;
 	}
 
-	next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx);
-	if (next_ftid >= t->nftids)
+	next_ftid = find_next_bit(bmap, max_tid, idx);
+	if (next_ftid >= max_tid)
 		next_ftid = idx;
 
-	next_fe = &adap->tids.ftid_tab[next_ftid];
+	next_fe = &tab[next_ftid];
 
-	prev_ftid = find_last_bit(t->ftid_bmap, idx);
+	prev_ftid = find_last_bit(bmap, idx);
 	if (prev_ftid >= idx)
 		prev_ftid = idx;
 
@@ -558,13 +604,13 @@ bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio)
 	 * accordingly.
 	 */
 	if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) {
-		prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x3];
+		prev_fe = &tab[prev_ftid & ~0x3];
 		if (!prev_fe->fs.type)
-			prev_fe = &adap->tids.ftid_tab[prev_ftid];
+			prev_fe = &tab[prev_ftid];
 	} else {
-		prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x1];
+		prev_fe = &tab[prev_ftid & ~0x1];
 		if (!prev_fe->fs.type)
-			prev_fe = &adap->tids.ftid_tab[prev_ftid];
+			prev_fe = &tab[prev_ftid];
 	}
 
 	if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) ||
@@ -579,11 +625,16 @@ out_unlock:
 /* Delete the filter at a specified index. */
 static int del_filter_wr(struct adapter *adapter, int fidx)
 {
-	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
 	struct fw_filter_wr *fwr;
+	struct filter_entry *f;
 	struct sk_buff *skb;
 	unsigned int len;
 
+	if (fidx < adapter->tids.nhpftids)
+		f = &adapter->tids.hpftid_tab[fidx];
+	else
+		f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids];
+
 	len = sizeof(*fwr);
 
 	skb = alloc_skb(len, GFP_KERNEL);
@@ -609,10 +660,15 @@ static int del_filter_wr(struct adapter *adapter, int fidx)
  */
 int set_filter_wr(struct adapter *adapter, int fidx)
 {
-	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
 	struct fw_filter2_wr *fwr;
+	struct filter_entry *f;
 	struct sk_buff *skb;
 
+	if (fidx < adapter->tids.nhpftids)
+		f = &adapter->tids.hpftid_tab[fidx];
+	else
+		f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids];
+
 	skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
@@ -762,10 +818,14 @@ int delete_filter(struct adapter *adapter, unsigned int fidx)
 	struct filter_entry *f;
 	int ret;
 
-	if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
+	if (fidx >= adapter->tids.nftids + adapter->tids.nsftids +
+		    adapter->tids.nhpftids)
 		return -EINVAL;
 
-	f = &adapter->tids.ftid_tab[fidx];
+	if (fidx < adapter->tids.nhpftids)
+		f = &adapter->tids.hpftid_tab[fidx];
+	else
+		f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids];
 	ret = writable_filter(f);
 	if (ret)
 		return ret;
@@ -811,12 +871,22 @@ void clear_all_filters(struct adapter *adapter)
 	struct net_device *dev = adapter->port[0];
 	unsigned int i;
 
+	if (adapter->tids.hpftid_tab) {
+		struct filter_entry *f = &adapter->tids.hpftid_tab[0];
+
+		for (i = 0; i < adapter->tids.nhpftids; i++, f++)
+			if (f->valid || f->pending)
+				cxgb4_del_filter(dev, i, &f->fs);
+	}
+
 	if (adapter->tids.ftid_tab) {
 		struct filter_entry *f = &adapter->tids.ftid_tab[0];
 		unsigned int max_ftid = adapter->tids.nftids +
-					adapter->tids.nsftids;
+					adapter->tids.nsftids +
+					adapter->tids.nhpftids;
+
 		/* Clear all TCAM filters */
-		for (i = 0; i < max_ftid; i++, f++)
+		for (i = adapter->tids.nhpftids; i < max_ftid; i++, f++)
 			if (f->valid || f->pending)
 				cxgb4_del_filter(dev, i, &f->fs);
 	}
@@ -1319,17 +1389,17 @@ out_err:
  * filter specification in order to facilitate signaling completion of the
  * operation.
  */
-int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+int __cxgb4_set_filter(struct net_device *dev, int ftid,
 		       struct ch_filter_specification *fs,
 		       struct filter_ctx *ctx)
 {
 	struct adapter *adapter = netdev2adap(dev);
-	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
-	unsigned int max_fidx, fidx;
-	struct filter_entry *f;
+	unsigned int max_fidx, fidx, chip_ver;
+	int iq, ret, filter_id = ftid;
+	struct filter_entry *f, *tab;
 	u32 iconf;
-	int iq, ret;
 
+	chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
 	if (fs->hash) {
 		if (is_hashfilter(adapter))
 			return cxgb4_set_hash_filter(dev, fs, ctx);
@@ -1338,7 +1408,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		return -EINVAL;
 	}
 
-	max_fidx = adapter->tids.nftids;
+	max_fidx = adapter->tids.nftids + adapter->tids.nhpftids;
 	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
 	    filter_id >= max_fidx)
 		return -E2BIG;
@@ -1353,6 +1423,13 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 	if (iq < 0)
 		return iq;
 
+	if (fs->prio) {
+		tab = &adapter->tids.hpftid_tab[0];
+	} else {
+		tab = &adapter->tids.ftid_tab[0];
+		filter_id = ftid - adapter->tids.nhpftids;
+	}
+
 	/* IPv6 filters occupy four slots and must be aligned on
 	 * four-slot boundaries.  IPv4 filters only occupy a single
 	 * slot and have no alignment requirements but writing a new
@@ -1373,9 +1450,8 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		else
 			fidx = filter_id & ~0x1;
 
-		if (fidx != filter_id &&
-		    adapter->tids.ftid_tab[fidx].fs.type) {
-			f = &adapter->tids.ftid_tab[fidx];
+		if (fidx != filter_id && tab[fidx].fs.type) {
+			f = &tab[fidx];
 			if (f->valid) {
 				dev_err(adapter->pdev_dev,
 					"Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n",
@@ -1399,7 +1475,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 			 */
 			for (fidx = filter_id + 1; fidx < filter_id + 4;
 			     fidx++) {
-				f = &adapter->tids.ftid_tab[fidx];
+				f = &tab[fidx];
 				if (f->valid) {
 					dev_err(adapter->pdev_dev,
 						"Invalid location.  IPv6 requires 4 slots and an IPv4 filter exists at %u\n",
@@ -1415,7 +1491,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 				return -EINVAL;
 			/* Check overlapping IPv4 filter slot */
 			fidx = filter_id + 1;
-			f = &adapter->tids.ftid_tab[fidx];
+			f = &tab[fidx];
 			if (f->valid) {
 				pr_err("%s: IPv6 filter requires 2 indices. IPv4 filter already present at %d. Please remove IPv4 filter first.\n",
 				       __func__, fidx);
@@ -1427,36 +1503,35 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 	/* Check to make sure that provided filter index is not
 	 * already in use by someone else
 	 */
-	f = &adapter->tids.ftid_tab[filter_id];
+	f = &tab[filter_id];
 	if (f->valid)
 		return -EBUSY;
 
-	fidx = filter_id + adapter->tids.ftid_base;
-	ret = cxgb4_set_ftid(&adapter->tids, filter_id,
-			     fs->type ? PF_INET6 : PF_INET,
-			     chip_ver);
+	if (fs->prio) {
+		fidx = filter_id + adapter->tids.hpftid_base;
+		ret = cxgb4_set_hpftid(&adapter->tids, filter_id,
+				       fs->type ? PF_INET6 : PF_INET);
+	} else {
+		fidx = filter_id + adapter->tids.ftid_base;
+		ret = cxgb4_set_ftid(&adapter->tids, filter_id,
+				     fs->type ? PF_INET6 : PF_INET,
+				     chip_ver);
+	}
+
 	if (ret)
 		return ret;
 
 	/* Check t  make sure the filter requested is writable ... */
 	ret = writable_filter(f);
-	if (ret) {
-		/* Clear the bits we have set above */
-		cxgb4_clear_ftid(&adapter->tids, filter_id,
-				 fs->type ? PF_INET6 : PF_INET,
-				 chip_ver);
-		return ret;
-	}
+	if (ret)
+		goto free_tid;
 
 	if (is_t6(adapter->params.chip) && fs->type &&
 	    ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
 	    IPV6_ADDR_ANY) {
 		ret = cxgb4_clip_get(dev, (const u32 *)&fs->val.lip, 1);
-		if (ret) {
-			cxgb4_clear_ftid(&adapter->tids, filter_id, PF_INET6,
-					 chip_ver);
-			return ret;
-		}
+		if (ret)
+			goto free_tid;
 	}
 
 	/* Convert the filter specification into our internal format.
@@ -1487,7 +1562,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 						      f->fs.mask.vni,
 						      0, 1, 1);
 			if (ret < 0)
-				goto free_clip;
+				goto free_tid;
 
 			f->fs.val.ovlan = ret;
 			f->fs.mask.ovlan = 0x1ff;
@@ -1501,21 +1576,22 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 	 */
 	f->ctx = ctx;
 	f->tid = fidx; /* Save the actual tid */
-	ret = set_filter_wr(adapter, filter_id);
-	if (ret) {
+	ret = set_filter_wr(adapter, ftid);
+	if (ret)
+		goto free_tid;
+
+	return ret;
+
+free_tid:
+	if (f->fs.prio)
+		cxgb4_clear_hpftid(&adapter->tids, filter_id,
+				   fs->type ? PF_INET6 : PF_INET);
+	else
 		cxgb4_clear_ftid(&adapter->tids, filter_id,
 				 fs->type ? PF_INET6 : PF_INET,
 				 chip_ver);
-		clear_filter(adapter, f);
-	}
-
-	return ret;
 
-free_clip:
-	if (is_t6(adapter->params.chip) && f->fs.type)
-		cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
-	cxgb4_clear_ftid(&adapter->tids, filter_id,
-			 fs->type ? PF_INET6 : PF_INET, chip_ver);
+	clear_filter(adapter, f);
 	return ret;
 }
 
@@ -1537,7 +1613,7 @@ static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id,
 	netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n",
 		   __func__, filter_id, adapter->tids.nftids);
 
-	if (filter_id > adapter->tids.ntids)
+	if (tid_out_of_range(t, filter_id))
 		return -E2BIG;
 
 	f = lookup_tid(t, filter_id);
@@ -1590,11 +1666,11 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id,
 		       struct filter_ctx *ctx)
 {
 	struct adapter *adapter = netdev2adap(dev);
-	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
+	unsigned int max_fidx, chip_ver;
 	struct filter_entry *f;
-	unsigned int max_fidx;
 	int ret;
 
+	chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
 	if (fs && fs->hash) {
 		if (is_hashfilter(adapter))
 			return cxgb4_del_hash_filter(dev, filter_id, ctx);
@@ -1603,21 +1679,31 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id,
 		return -EINVAL;
 	}
 
-	max_fidx = adapter->tids.nftids;
+	max_fidx = adapter->tids.nftids + adapter->tids.nhpftids;
 	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
 	    filter_id >= max_fidx)
 		return -E2BIG;
 
-	f = &adapter->tids.ftid_tab[filter_id];
+	if (filter_id < adapter->tids.nhpftids)
+		f = &adapter->tids.hpftid_tab[filter_id];
+	else
+		f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids];
+
 	ret = writable_filter(f);
 	if (ret)
 		return ret;
 
 	if (f->valid) {
 		f->ctx = ctx;
-		cxgb4_clear_ftid(&adapter->tids, filter_id,
-				 f->fs.type ? PF_INET6 : PF_INET,
-				 chip_ver);
+		if (f->fs.prio)
+			cxgb4_clear_hpftid(&adapter->tids,
+					   f->tid - adapter->tids.hpftid_base,
+					   f->fs.type ? PF_INET6 : PF_INET);
+		else
+			cxgb4_clear_ftid(&adapter->tids,
+					 f->tid - adapter->tids.ftid_base,
+					 f->fs.type ? PF_INET6 : PF_INET,
+					 chip_ver);
 		return del_filter_wr(adapter, filter_id);
 	}
 
@@ -1842,11 +1928,18 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 	max_fidx = adap->tids.nftids + adap->tids.nsftids;
 	/* Get the corresponding filter entry for this tid */
 	if (adap->tids.ftid_tab) {
-		/* Check this in normal filter region */
-		idx = tid - adap->tids.ftid_base;
-		if (idx >= max_fidx)
-			return;
-		f = &adap->tids.ftid_tab[idx];
+		idx = tid - adap->tids.hpftid_base;
+		if (idx < adap->tids.nhpftids) {
+			f = &adap->tids.hpftid_tab[idx];
+		} else {
+			/* Check this in normal filter region */
+			idx = tid - adap->tids.ftid_base;
+			if (idx >= max_fidx)
+				return;
+			f = &adap->tids.ftid_tab[idx];
+			idx += adap->tids.nhpftids;
+		}
+
 		if (f->tid != tid)
 			return;
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0dedd3e9c31e..649842a8aa28 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -804,6 +804,26 @@ static int setup_ppod_edram(struct adapter *adap)
 	return 0;
 }
 
+static void adap_config_hpfilter(struct adapter *adapter)
+{
+	u32 param, val = 0;
+	int ret;
+
+	/* Enable HP filter region. Older fw will fail this request and
+	 * it is fine.
+	 */
+	param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT);
+	ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
+			    1, &param, &val);
+
+	/* An error means FW doesn't know about HP filter support,
+	 * it's not a problem, don't return an error.
+	 */
+	if (ret < 0)
+		dev_err(adapter->pdev_dev,
+			"HP filter region isn't supported by FW\n");
+}
+
 /**
  *	cxgb4_write_rss - write the RSS table for a given port
  *	@pi: the port
@@ -1427,8 +1447,8 @@ static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
 static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
 				    unsigned int tid)
 {
-	void **p = &t->tid_tab[tid];
 	struct adapter *adap = container_of(t, struct adapter, tids);
+	void **p = &t->tid_tab[tid - t->tid_base];
 
 	spin_lock_bh(&adap->tid_release_lock);
 	*p = adap->tid_release_head;
@@ -1480,13 +1500,13 @@ static void process_tid_release_list(struct work_struct *work)
 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid,
 		      unsigned short family)
 {
-	struct sk_buff *skb;
 	struct adapter *adap = container_of(t, struct adapter, tids);
+	struct sk_buff *skb;
 
-	WARN_ON(tid >= t->ntids);
+	WARN_ON(tid_out_of_range(&adap->tids, tid));
 
-	if (t->tid_tab[tid]) {
-		t->tid_tab[tid] = NULL;
+	if (t->tid_tab[tid - adap->tids.tid_base]) {
+		t->tid_tab[tid - adap->tids.tid_base] = NULL;
 		atomic_dec(&t->conns_in_use);
 		if (t->hash_base && (tid >= t->hash_base)) {
 			if (family == AF_INET6)
@@ -1518,6 +1538,7 @@ static int tid_init(struct tid_info *t)
 	struct adapter *adap = container_of(t, struct adapter, tids);
 	unsigned int max_ftids = t->nftids + t->nsftids;
 	unsigned int natids = t->natids;
+	unsigned int hpftid_bmap_size;
 	unsigned int eotid_bmap_size;
 	unsigned int stid_bmap_size;
 	unsigned int ftid_bmap_size;
@@ -1525,12 +1546,15 @@ static int tid_init(struct tid_info *t)
 
 	stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
 	ftid_bmap_size = BITS_TO_LONGS(t->nftids);
+	hpftid_bmap_size = BITS_TO_LONGS(t->nhpftids);
 	eotid_bmap_size = BITS_TO_LONGS(t->neotids);
 	size = t->ntids * sizeof(*t->tid_tab) +
 	       natids * sizeof(*t->atid_tab) +
 	       t->nstids * sizeof(*t->stid_tab) +
 	       t->nsftids * sizeof(*t->stid_tab) +
 	       stid_bmap_size * sizeof(long) +
+	       t->nhpftids * sizeof(*t->hpftid_tab) +
+	       hpftid_bmap_size * sizeof(long) +
 	       max_ftids * sizeof(*t->ftid_tab) +
 	       ftid_bmap_size * sizeof(long) +
 	       t->neotids * sizeof(*t->eotid_tab) +
@@ -1543,7 +1567,9 @@ static int tid_init(struct tid_info *t)
 	t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
 	t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
 	t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
-	t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+	t->hpftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+	t->hpftid_bmap = (unsigned long *)&t->hpftid_tab[t->nhpftids];
+	t->ftid_tab = (struct filter_entry *)&t->hpftid_bmap[hpftid_bmap_size];
 	t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
 	t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size];
 	t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids];
@@ -1578,6 +1604,8 @@ static int tid_init(struct tid_info *t)
 			bitmap_zero(t->eotid_bmap, t->neotids);
 	}
 
+	if (t->nhpftids)
+		bitmap_zero(t->hpftid_bmap, t->nhpftids);
 	bitmap_zero(t->ftid_bmap, t->nftids);
 	return 0;
 }
@@ -4359,6 +4387,7 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 			"HMA configuration failed with error %d\n", ret);
 
 	if (is_t6(adapter->params.chip)) {
+		adap_config_hpfilter(adapter);
 		ret = setup_ppod_edram(adapter);
 		if (!ret)
 			dev_info(adapter->pdev_dev, "Successfully enabled "
@@ -4668,16 +4697,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 	/*
 	 * Grab some of our basic fundamental operating parameters.
 	 */
-#define FW_PARAM_DEV(param) \
-	(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \
-	FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param))
-
-#define FW_PARAM_PFVF(param) \
-	FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
-	FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param)|  \
-	FW_PARAMS_PARAM_Y_V(0) | \
-	FW_PARAMS_PARAM_Z_V(0)
-
 	params[0] = FW_PARAM_PFVF(EQ_START);
 	params[1] = FW_PARAM_PFVF(L2T_START);
 	params[2] = FW_PARAM_PFVF(L2T_END);
@@ -4695,6 +4714,16 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 	adap->sge.ingr_start = val[5];
 
 	if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+		params[0] = FW_PARAM_PFVF(HPFILTER_START);
+		params[1] = FW_PARAM_PFVF(HPFILTER_END);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+				      params, val);
+		if (ret < 0)
+			goto bye;
+
+		adap->tids.hpftid_base = val[0];
+		adap->tids.nhpftids = val[1] - val[0] + 1;
+
 		/* Read the raw mps entries. In T6, the last 2 tcam entries
 		 * are reserved for raw mac addresses (rawf = 2, one per port).
 		 */
@@ -4706,6 +4735,9 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 			adap->rawf_start = val[0];
 			adap->rawf_cnt = val[1] - val[0] + 1;
 		}
+
+		adap->tids.tid_base =
+			t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A);
 	}
 
 	/* qids (ingress/egress) returned from firmware can be anywhere
@@ -5058,8 +5090,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 		}
 		adap->params.crypto = ntohs(caps_cmd.cryptocaps);
 	}
-#undef FW_PARAM_PFVF
-#undef FW_PARAM_DEV
 
 	/* The MTU/MSS Table is initialized by now, so load their values.  If
 	 * we're initializing the adapter, then we'll make any modifications
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 0fa80bef575d..bb5513bdd293 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -672,10 +672,14 @@ int cxgb4_tc_flower_replace(struct net_device *dev,
 		 * 0 to driver. However, the hardware TCAM index
 		 * starts from 0. Hence, the -1 here.
 		 */
-		if (cls->common.prio <= adap->tids.nftids)
+		if (cls->common.prio <= (adap->tids.nftids +
+					 adap->tids.nhpftids)) {
 			fidx = cls->common.prio - 1;
-		else
+			if (fidx < adap->tids.nhpftids)
+				fs->prio = 1;
+		} else {
 			fidx = cxgb4_get_free_ftid(dev, inet_family);
+		}
 
 		/* Only insert FLOWER rule if its priority doesn't
 		 * conflict with existing rules in the LETCAM.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
index 6d485803ddbe..1b7681a4eb32 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
@@ -204,7 +204,7 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev,
 	 * -1 here. 1 slot is enough to create a wildcard matchall
 	 * VIID rule.
 	 */
-	if (cls->common.prio <= adap->tids.nftids)
+	if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids))
 		fidx = cls->common.prio - 1;
 	else
 		fidx = cxgb4_get_free_ftid(dev, PF_INET);
@@ -223,6 +223,8 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev,
 	fs = &tc_port_matchall->ingress.fs;
 	memset(fs, 0, sizeof(*fs));
 
+	if (fidx < adap->tids.nhpftids)
+		fs->prio = 1;
 	fs->tc_prio = cls->common.prio;
 	fs->tc_cookie = cls->cookie;
 	fs->hitcnts = 1;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 133f8623ba86..269b8d9e25e0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -176,7 +176,7 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 	/* Only insert U32 rule if its priority doesn't conflict with
 	 * existing rules in the LETCAM.
 	 */
-	if (filter_id >= adapter->tids.nftids ||
+	if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids ||
 	    !cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "No free LETCAM index available");
@@ -199,6 +199,8 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 
 	memset(&fs, 0, sizeof(fs));
 
+	if (filter_id < adapter->tids.nhpftids)
+		fs.prio = 1;
 	fs.tc_prio = cls->common.prio;
 	fs.tc_cookie = cls->knode.handle;
 
@@ -355,6 +357,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 	unsigned int filter_id, max_tids, i, j;
 	struct cxgb4_link *link = NULL;
 	struct cxgb4_tc_u32_table *t;
+	struct filter_entry *f;
 	u32 handle, uhtid;
 	int ret;
 
@@ -363,8 +366,15 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 
 	/* Fetch the location to delete the filter. */
 	filter_id = TC_U32_NODE(cls->knode.handle) - 1;
-	if (filter_id >= adapter->tids.nftids ||
-	    cls->knode.handle != adapter->tids.ftid_tab[filter_id].fs.tc_cookie)
+	if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids)
+		return -ERANGE;
+
+	if (filter_id < adapter->tids.nhpftids)
+		f = &adapter->tids.hpftid_tab[filter_id];
+	else
+		f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids];
+
+	if (cls->knode.handle != f->fs.tc_cookie)
 		return -ERANGE;
 
 	t = adapter->tc_u32;
@@ -445,7 +455,7 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap)
 
 struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap)
 {
-	unsigned int max_tids = adap->tids.nftids;
+	unsigned int max_tids = adap->tids.nftids + adap->tids.nhpftids;
 	struct cxgb4_tc_u32_table *t;
 	unsigned int i;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 861b25d28ed6..d9d27bc1ae67 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -99,6 +99,7 @@ struct eotid_entry {
  */
 struct tid_info {
 	void **tid_tab;
+	unsigned int tid_base;
 	unsigned int ntids;
 
 	struct serv_entry *stid_tab;
@@ -111,6 +112,11 @@ struct tid_info {
 	unsigned int natids;
 	unsigned int atid_base;
 
+	struct filter_entry *hpftid_tab;
+	unsigned long *hpftid_bmap;
+	unsigned int nhpftids;
+	unsigned int hpftid_base;
+
 	struct filter_entry *ftid_tab;
 	unsigned long *ftid_bmap;
 	unsigned int nftids;
@@ -147,9 +153,15 @@ struct tid_info {
 
 static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
 {
+	tid -= t->tid_base;
 	return tid < t->ntids ? t->tid_tab[tid] : NULL;
 }
 
+static inline bool tid_out_of_range(const struct tid_info *t, unsigned int tid)
+{
+	return ((tid - t->tid_base) >= t->ntids);
+}
+
 static inline void *lookup_atid(const struct tid_info *t, unsigned int atid)
 {
 	return atid < t->natids ? t->atid_tab[atid].data : NULL;
@@ -171,7 +183,7 @@ static inline void *lookup_stid(const struct tid_info *t, unsigned int stid)
 static inline void cxgb4_insert_tid(struct tid_info *t, void *data,
 				    unsigned int tid, unsigned short family)
 {
-	t->tid_tab[tid] = data;
+	t->tid_tab[tid - t->tid_base] = data;
 	if (t->hash_base && (tid >= t->hash_base)) {
 		if (family == AF_INET6)
 			atomic_add(2, &t->hash_tids_in_use);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index e9e45006632d..1a16449e9deb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -678,8 +678,7 @@ static void *l2t_seq_start(struct seq_file *seq, loff_t *pos)
 static void *l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	v = l2t_get_idx(seq, *pos);
-	if (v)
-		++*pos;
+	++(*pos);
 	return v;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ac4fb43bdec6..accad1101ad1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1321,6 +1321,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
 	FW_PARAMS_PARAM_DEV_PPOD_EDRAM  = 0x23,
 	FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
+	FW_PARAMS_PARAM_DEV_HPFILTER_REGION_SUPPORT = 0x26,
 	FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
 	FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
 	FW_PARAMS_PARAM_DEV_DBQ_TIMER	= 0x29,
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index c9aebcde403a..33ace3307059 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1128,7 +1128,7 @@ net_get_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
-static void net_timeout(struct net_device *dev)
+static void net_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	/* If we get here, some higher level has decided we are broken.
 	   There should really be a "kick me" function call instead. */
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index acb2856936d2..bbd7b3175f09 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1095,7 +1095,7 @@ static void enic_set_rx_mode(struct net_device *netdev)
 }
 
 /* netif_tx_lock held, BHs disabled */
-static void enic_tx_timeout(struct net_device *netdev)
+static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct enic *enic = netdev_priv(netdev);
 	schedule_work(&enic->tx_hang_reset);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 2814b96751b4..f30fa8e6ef80 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -1298,7 +1298,7 @@ out_drop:
 	return NETDEV_TX_OK;
 }
 
-static void gmac_tx_timeout(struct net_device *netdev)
+static void gmac_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	netdev_err(netdev, "Tx timeout\n");
 	gmac_dump_dma_state(netdev);
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index cce90b5925d9..1ea3372775e6 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -964,7 +964,7 @@ dm9000_init_dm9000(struct net_device *dev)
 }
 
 /* Our watchdog timed out. Called by the networking layer */
-static void dm9000_timeout(struct net_device *dev)
+static void dm9000_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct board_info *db = netdev_priv(dev);
 	u8 reg_save;
diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index f1a2da15dd0a..d305d1b24b0a 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -1436,7 +1436,7 @@ static int de_close (struct net_device *dev)
 	return 0;
 }
 
-static void de_tx_timeout (struct net_device *dev)
+static void de_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	struct de_private *de = netdev_priv(dev);
 	const int irq = de->pdev->irq;
@@ -2039,7 +2039,7 @@ static int de_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* remap CSR registers */
-	regs = ioremap_nocache(pciaddr, DE_REGS_SIZE);
+	regs = ioremap(pciaddr, DE_REGS_SIZE);
 	if (!regs) {
 		rc = -EIO;
 		pr_err("Cannot map PCI MMIO (%llx@%lx) on pci dev %s\n",
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 0efdbd1a4a6f..32d470d4122a 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void)
 	if (cr6set)
 		dmfe_cr6_user_set = cr6set;
 
- 	switch(mode) {
-   	case DMFE_10MHF:
+	switch (mode) {
+	case DMFE_10MHF:
 	case DMFE_100MHF:
 	case DMFE_10MFD:
 	case DMFE_100MFD:
 	case DMFE_1M_HPNA:
 		dmfe_media_mode = mode;
 		break;
-	default:dmfe_media_mode = DMFE_AUTO;
+	default:
+		dmfe_media_mode = DMFE_AUTO;
 		break;
 	}
 
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 3e3e08698876..9e9d9eee29d9 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -255,7 +255,7 @@ MODULE_DEVICE_TABLE(pci, tulip_pci_tbl);
 const char tulip_media_cap[32] =
 {0,0,0,16,  3,19,16,24,  27,4,7,5, 0,20,23,20,  28,31,0,0, };
 
-static void tulip_tx_timeout(struct net_device *dev);
+static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void tulip_init_ring(struct net_device *dev);
 static void tulip_free_ring(struct net_device *dev);
 static netdev_tx_t tulip_start_xmit(struct sk_buff *skb,
@@ -534,7 +534,7 @@ free_ring:
 }
 
 
-static void tulip_tx_timeout(struct net_device *dev)
+static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct tulip_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->base_addr;
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index b1f30b194300..117ffe08800d 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void)
 	if (cr6set)
 		uli526x_cr6_user_set = cr6set;
 
- 	switch (mode) {
-   	case ULI526X_10MHF:
+	switch (mode) {
+	case ULI526X_10MHF:
 	case ULI526X_100MHF:
 	case ULI526X_10MFD:
 	case ULI526X_100MFD:
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 70cb2d689c2c..7f136488e67c 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -331,7 +331,7 @@ static void netdev_timer(struct timer_list *t);
 static void init_rxtx_rings(struct net_device *dev);
 static void free_rxtx_rings(struct netdev_private *np);
 static void init_registers(struct net_device *dev);
-static void tx_timeout(struct net_device *dev);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
 static int alloc_ringdesc(struct net_device *dev);
 static void free_ringdesc(struct netdev_private *np);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
@@ -921,7 +921,7 @@ static void init_registers(struct net_device *dev)
 	iowrite32(0, ioaddr + RxStartDemand);
 }
 
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->base_addr;
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 55e720d2ea0c..26c5da032b1e 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -66,7 +66,7 @@ static const int multicast_filter_limit = 0x40;
 
 static int rio_open (struct net_device *dev);
 static void rio_timer (struct timer_list *t);
-static void rio_tx_timeout (struct net_device *dev);
+static void rio_tx_timeout (struct net_device *dev, unsigned int txqueue);
 static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t rio_interrupt (int irq, void *dev_instance);
 static void rio_free_tx (struct net_device *dev, int irq);
@@ -696,7 +696,7 @@ rio_timer (struct timer_list *t)
 }
 
 static void
-rio_tx_timeout (struct net_device *dev)
+rio_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->ioaddr;
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 4a37a69764ce..b91387c456ba 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -432,7 +432,7 @@ static int  mdio_wait_link(struct net_device *dev, int wait);
 static int  netdev_open(struct net_device *dev);
 static void check_duplex(struct net_device *dev);
 static void netdev_timer(struct timer_list *t);
-static void tx_timeout(struct net_device *dev);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static int reset_tx (struct net_device *dev);
@@ -969,7 +969,7 @@ static void netdev_timer(struct timer_list *t)
 	add_timer(&np->timer);
 }
 
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->base;
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index e24979010969..5f8fa1145db6 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -725,19 +725,6 @@ static struct net_device_stats *dnet_get_stats(struct net_device *dev)
 	return nstat;
 }
 
-static int dnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	struct phy_device *phydev = dev->phydev;
-
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(phydev, rq, cmd);
-}
-
 static void dnet_get_drvinfo(struct net_device *dev,
 			     struct ethtool_drvinfo *info)
 {
@@ -759,7 +746,7 @@ static const struct net_device_ops dnet_netdev_ops = {
 	.ndo_stop		= dnet_close,
 	.ndo_get_stats		= dnet_get_stats,
 	.ndo_start_xmit		= dnet_start_xmit,
-	.ndo_do_ioctl		= dnet_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 39eb7d525043..56f59db6ebf2 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1417,7 +1417,7 @@ drop:
 	return NETDEV_TX_OK;
 }
 
-static void be_tx_timeout(struct net_device *netdev)
+static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct device *dev = &adapter->pdev->dev;
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index ea4f17f5cce7..a817ca661c1f 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -869,7 +869,7 @@ static int ethoc_change_mtu(struct net_device *dev, int new_mtu)
 	return -ENOSYS;
 }
 
-static void ethoc_tx_timeout(struct net_device *dev)
+static void ethoc_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ethoc *priv = netdev_priv(dev);
 	u32 pending = ethoc_read(priv, INT_SOURCE);
@@ -1087,7 +1087,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	priv = netdev_priv(netdev);
 	priv->netdev = netdev;
 
-	priv->iobase = devm_ioremap_nocache(&pdev->dev, netdev->base_addr,
+	priv->iobase = devm_ioremap(&pdev->dev, netdev->base_addr,
 			resource_size(mmio));
 	if (!priv->iobase) {
 		dev_err(&pdev->dev, "cannot remap I/O memory space\n");
@@ -1096,7 +1096,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	}
 
 	if (netdev->mem_end) {
-		priv->membase = devm_ioremap_nocache(&pdev->dev,
+		priv->membase = devm_ioremap(&pdev->dev,
 			netdev->mem_start, resource_size(mem));
 		if (!priv->membase) {
 			dev_err(&pdev->dev, "cannot remap memory space\n");
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 8ed85037f021..4572797f00d7 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1536,16 +1536,7 @@ static int ftgmac100_stop(struct net_device *netdev)
 	return 0;
 }
 
-/* optional */
-static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
-{
-	if (!netdev->phydev)
-		return -ENXIO;
-
-	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
-}
-
-static void ftgmac100_tx_timeout(struct net_device *netdev)
+static void ftgmac100_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ftgmac100 *priv = netdev_priv(netdev);
 
@@ -1597,7 +1588,7 @@ static const struct net_device_ops ftgmac100_netdev_ops = {
 	.ndo_start_xmit		= ftgmac100_hard_start_xmit,
 	.ndo_set_mac_address	= ftgmac100_set_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_do_ioctl		= ftgmac100_do_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl,
 	.ndo_tx_timeout		= ftgmac100_tx_timeout,
 	.ndo_set_rx_mode	= ftgmac100_set_rx_mode,
 	.ndo_set_features	= ftgmac100_set_features,
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index c24fd56a2c71..84f10970299a 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -428,7 +428,7 @@ static void getlinktype(struct net_device *dev);
 static void getlinkstatus(struct net_device *dev);
 static void netdev_timer(struct timer_list *t);
 static void reset_timer(struct timer_list *t);
-static void fealnx_tx_timeout(struct net_device *dev);
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void init_ring(struct net_device *dev);
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
@@ -1191,7 +1191,7 @@ static void reset_timer(struct timer_list *t)
 }
 
 
-static void fealnx_tx_timeout(struct net_device *dev)
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->mem;
diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index 6a93293d31e0..67c436400352 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -25,4 +25,5 @@ obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/
 obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/
 
 obj-$(CONFIG_FSL_ENETC) += enetc/
+obj-$(CONFIG_FSL_ENETC_MDIO) += enetc/
 obj-$(CONFIG_FSL_ENETC_VF) += enetc/
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index a301f0095223..09dbcd819d84 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -288,7 +288,7 @@ static int dpaa_stop(struct net_device *net_dev)
 	return err;
 }
 
-static void dpaa_tx_timeout(struct net_device *net_dev)
+static void dpaa_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
 {
 	struct dpaa_percpu_priv *percpu_priv;
 	const struct dpaa_priv	*priv;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
index 6437fe6b9abf..cc1b7f85e433 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
@@ -27,6 +27,20 @@ static int dpaa2_ptp_enable(struct ptp_clock_info *ptp,
 	mc_dev = to_fsl_mc_device(dev);
 
 	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		switch (rq->extts.index) {
+		case 0:
+			bit = DPRTC_EVENT_ETS1;
+			break;
+		case 1:
+			bit = DPRTC_EVENT_ETS2;
+			break;
+		default:
+			return -EINVAL;
+		}
+		if (on)
+			extts_clean_up(ptp_qoriq, rq->extts.index, false);
+		break;
 	case PTP_CLK_REQ_PPS:
 		bit = DPRTC_EVENT_PPS;
 		break;
@@ -96,6 +110,12 @@ static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv)
 		ptp_clock_event(ptp_qoriq->clock, &event);
 	}
 
+	if (status & DPRTC_EVENT_ETS1)
+		extts_clean_up(ptp_qoriq, 0, true);
+
+	if (status & DPRTC_EVENT_ETS2)
+		extts_clean_up(ptp_qoriq, 1, true);
+
 	err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle,
 				     DPRTC_IRQ_INDEX, status);
 	if (unlikely(err)) {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
index 4ac05bfef338..96ffeb948f08 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
@@ -9,9 +9,11 @@
 
 /* Command versioning */
 #define DPRTC_CMD_BASE_VERSION		1
+#define DPRTC_CMD_VERSION_2		2
 #define DPRTC_CMD_ID_OFFSET		4
 
 #define DPRTC_CMD(id)	(((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_BASE_VERSION)
+#define DPRTC_CMD_V2(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_VERSION_2)
 
 /* Command IDs */
 #define DPRTC_CMDID_CLOSE			DPRTC_CMD(0x800)
@@ -19,7 +21,7 @@
 
 #define DPRTC_CMDID_SET_IRQ_ENABLE		DPRTC_CMD(0x012)
 #define DPRTC_CMDID_GET_IRQ_ENABLE		DPRTC_CMD(0x013)
-#define DPRTC_CMDID_SET_IRQ_MASK		DPRTC_CMD(0x014)
+#define DPRTC_CMDID_SET_IRQ_MASK		DPRTC_CMD_V2(0x014)
 #define DPRTC_CMDID_GET_IRQ_MASK		DPRTC_CMD(0x015)
 #define DPRTC_CMDID_GET_IRQ_STATUS		DPRTC_CMD(0x016)
 #define DPRTC_CMDID_CLEAR_IRQ_STATUS		DPRTC_CMD(0x017)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
index 311c184e1aef..05c413719e55 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
@@ -20,6 +20,8 @@ struct fsl_mc_io;
 #define DPRTC_IRQ_INDEX		0
 
 #define DPRTC_EVENT_PPS		0x08000000
+#define DPRTC_EVENT_ETS1	0x00800000
+#define DPRTC_EVENT_ETS2	0x00400000
 
 int dprtc_open(struct fsl_mc_io *mc_io,
 	       u32 cmd_flags,
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index edad4ca46327..fe942de19597 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -2,6 +2,7 @@
 config FSL_ENETC
 	tristate "ENETC PF driver"
 	depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST)
+	select FSL_ENETC_MDIO
 	select PHYLIB
 	help
 	  This driver supports NXP ENETC gigabit ethernet controller PCIe
diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
index d0db33e5b6b7..74f7ac253b8b 100644
--- a/drivers/net/ethernet/freescale/enetc/Makefile
+++ b/drivers/net/ethernet/freescale/enetc/Makefile
@@ -3,7 +3,7 @@
 common-objs := enetc.o enetc_cbdr.o enetc_ethtool.o
 
 obj-$(CONFIG_FSL_ENETC) += fsl-enetc.o
-fsl-enetc-y := enetc_pf.o enetc_mdio.o $(common-objs)
+fsl-enetc-y := enetc_pf.o $(common-objs)
 fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o
 fsl-enetc-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 17739906c966..1f79e36116a3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -149,11 +149,21 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 
 	if (enetc_tx_csum(skb, &temp_bd))
 		flags |= ENETC_TXBD_FLAGS_CSUM | ENETC_TXBD_FLAGS_L4CS;
+	else if (tx_ring->tsd_enable)
+		flags |= ENETC_TXBD_FLAGS_TSE | ENETC_TXBD_FLAGS_TXSTART;
 
 	/* first BD needs frm_len and offload flags set */
 	temp_bd.frm_len = cpu_to_le16(skb->len);
 	temp_bd.flags = flags;
 
+	if (flags & ENETC_TXBD_FLAGS_TSE) {
+		u32 temp;
+
+		temp = (skb->skb_mstamp_ns >> 5 & ENETC_TXBD_TXSTART_MASK)
+			| (flags << ENETC_TXBD_FLAGS_OFFSET);
+		temp_bd.txstart = cpu_to_le32(temp);
+	}
+
 	if (flags & ENETC_TXBD_FLAGS_EX) {
 		u8 e_flags = 0;
 		*txbd = temp_bd;
@@ -227,6 +237,8 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 	enetc_bdr_idx_inc(tx_ring, &i);
 	tx_ring->next_to_use = i;
 
+	skb_tx_timestamp(skb);
+
 	/* let H/W know BD ring has been updated */
 	enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */
 
@@ -1503,6 +1515,8 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 		return enetc_setup_tc_taprio(ndev, type_data);
 	case TC_SETUP_QDISC_CBS:
 		return enetc_setup_tc_cbs(ndev, type_data);
+	case TC_SETUP_QDISC_ETF:
+		return enetc_setup_tc_txtime(ndev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 7ee0da6d0015..dd4a227ffc7a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -72,6 +72,7 @@ struct enetc_bdr {
 	struct enetc_ring_stats stats;
 
 	dma_addr_t bd_dma_base;
+	u8 tsd_enable; /* Time specific departure */
 } ____cacheline_aligned_in_smp;
 
 static inline void enetc_bdr_idx_inc(struct enetc_bdr *bdr, int *i)
@@ -256,8 +257,10 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd);
 int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
 void enetc_sched_speed_set(struct net_device *ndev);
 int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data);
+int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data);
 #else
 #define enetc_setup_tc_taprio(ndev, type_data) -EOPNOTSUPP
 #define enetc_sched_speed_set(ndev) (void)0
 #define enetc_setup_tc_cbs(ndev, type_data) -EOPNOTSUPP
+#define enetc_setup_tc_txtime(ndev, type_data) -EOPNOTSUPP
 #endif
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 880a8ed8bb47..301ee0dde02d 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -579,6 +579,7 @@ static int enetc_get_ts_info(struct net_device *ndev,
 			   (1 << HWTSTAMP_FILTER_ALL);
 #else
 	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_TX_SOFTWARE |
 				SOF_TIMESTAMPING_SOFTWARE;
 #endif
 	return 0;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 51f543ef37a8..62554f28ce07 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -200,6 +200,7 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PFPMR		0x1900
 #define ENETC_PFPMR_PMACE	BIT(1)
 #define ENETC_PFPMR_MWLM	BIT(0)
+#define ENETC_EMDIO_BASE	0x1c00
 #define ENETC_PSIUMHFR0(n, err)	(((err) ? 0x1d08 : 0x1d00) + (n) * 0x10)
 #define ENETC_PSIUMHFR1(n)	(0x1d04 + (n) * 0x10)
 #define ENETC_PSIMMHFR0(n, err)	(((err) ? 0x1d00 : 0x1d08) + (n) * 0x10)
@@ -358,6 +359,7 @@ union enetc_tx_bd {
 				u8 l4_csoff;
 				u8 flags;
 			}; /* default layout */
+			__le32 txstart;
 			__le32 lstatus;
 		};
 	};
@@ -378,11 +380,14 @@ union enetc_tx_bd {
 };
 
 #define ENETC_TXBD_FLAGS_L4CS	BIT(0)
+#define ENETC_TXBD_FLAGS_TSE	BIT(1)
 #define ENETC_TXBD_FLAGS_W	BIT(2)
 #define ENETC_TXBD_FLAGS_CSUM	BIT(3)
+#define ENETC_TXBD_FLAGS_TXSTART BIT(4)
 #define ENETC_TXBD_FLAGS_EX	BIT(6)
 #define ENETC_TXBD_FLAGS_F	BIT(7)
-
+#define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0)
+#define ENETC_TXBD_FLAGS_OFFSET 24
 static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd)
 {
 	memset(txbd, 0, sizeof(*txbd));
@@ -615,3 +620,7 @@ struct enetc_cbd {
 /* Port time gating capability register */
 #define ENETC_QBV_PTGCAPR_OFFSET	0x11a08
 #define ENETC_QBV_MAX_GCL_LEN_MASK	GENMASK(15, 0)
+
+/* Port time specific departure */
+#define ENETC_PTCTSDR(n)	(0x1210 + 4 * (n))
+#define ENETC_TSDE		BIT(31)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
index 149883c8f0b8..48c32a171afa 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
@@ -1,41 +1,56 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2019 NXP */
 
+#include <linux/fsl/enetc_mdio.h>
 #include <linux/mdio.h>
 #include <linux/of_mdio.h>
 #include <linux/iopoll.h>
 #include <linux/of.h>
 
-#include "enetc_mdio.h"
+#include "enetc_pf.h"
 
-#define	ENETC_MDIO_REG_OFFSET	0x1c00
 #define	ENETC_MDIO_CFG	0x0	/* MDIO configuration and status */
 #define	ENETC_MDIO_CTL	0x4	/* MDIO control */
 #define	ENETC_MDIO_DATA	0x8	/* MDIO data */
 #define	ENETC_MDIO_ADDR	0xc	/* MDIO address */
 
-#define enetc_mdio_rd(hw, off) \
-	enetc_port_rd(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET)
-#define enetc_mdio_wr(hw, off, val) \
-	enetc_port_wr(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET, val)
-#define enetc_mdio_rd_reg(off)	enetc_mdio_rd(hw, off)
+static inline u32 _enetc_mdio_rd(struct enetc_mdio_priv *mdio_priv, int off)
+{
+	return enetc_port_rd(mdio_priv->hw, mdio_priv->mdio_base + off);
+}
+
+static inline void _enetc_mdio_wr(struct enetc_mdio_priv *mdio_priv, int off,
+				  u32 val)
+{
+	enetc_port_wr(mdio_priv->hw, mdio_priv->mdio_base + off, val);
+}
 
-#define ENETC_MDC_DIV		258
+#define enetc_mdio_rd(mdio_priv, off) \
+	_enetc_mdio_rd(mdio_priv, ENETC_##off)
+#define enetc_mdio_wr(mdio_priv, off, val) \
+	_enetc_mdio_wr(mdio_priv, ENETC_##off, val)
+#define enetc_mdio_rd_reg(off)	enetc_mdio_rd(mdio_priv, off)
 
 #define MDIO_CFG_CLKDIV(x)	((((x) >> 1) & 0xff) << 8)
 #define MDIO_CFG_BSY		BIT(0)
 #define MDIO_CFG_RD_ER		BIT(1)
+#define MDIO_CFG_HOLD(x)	(((x) << 2) & GENMASK(4, 2))
 #define MDIO_CFG_ENC45		BIT(6)
  /* external MDIO only - driven on neg MDC edge */
 #define MDIO_CFG_NEG		BIT(23)
 
+#define ENETC_EMDIO_CFG \
+	(MDIO_CFG_HOLD(2) | \
+	 MDIO_CFG_CLKDIV(258) | \
+	 MDIO_CFG_NEG)
+
 #define MDIO_CTL_DEV_ADDR(x)	((x) & 0x1f)
 #define MDIO_CTL_PORT_ADDR(x)	(((x) & 0x1f) << 5)
 #define MDIO_CTL_READ		BIT(15)
 #define MDIO_DATA(x)		((x) & 0xffff)
 
 #define TIMEOUT	1000
-static int enetc_mdio_wait_complete(struct enetc_hw *hw)
+static int enetc_mdio_wait_complete(struct enetc_mdio_priv *mdio_priv)
 {
 	u32 val;
 
@@ -46,12 +61,11 @@ static int enetc_mdio_wait_complete(struct enetc_hw *hw)
 int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
 {
 	struct enetc_mdio_priv *mdio_priv = bus->priv;
-	struct enetc_hw *hw = mdio_priv->hw;
 	u32 mdio_ctl, mdio_cfg;
 	u16 dev_addr;
 	int ret;
 
-	mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG;
+	mdio_cfg = ENETC_EMDIO_CFG;
 	if (regnum & MII_ADDR_C45) {
 		dev_addr = (regnum >> 16) & 0x1f;
 		mdio_cfg |= MDIO_CFG_ENC45;
@@ -61,44 +75,44 @@ int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
 		mdio_cfg &= ~MDIO_CFG_ENC45;
 	}
 
-	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+	enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg);
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
 	/* set port and dev addr */
 	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
-	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+	enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl);
 
 	/* set the register address */
 	if (regnum & MII_ADDR_C45) {
-		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+		enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff);
 
-		ret = enetc_mdio_wait_complete(hw);
+		ret = enetc_mdio_wait_complete(mdio_priv);
 		if (ret)
 			return ret;
 	}
 
 	/* write the value */
-	enetc_mdio_wr(hw, MDIO_DATA, MDIO_DATA(value));
+	enetc_mdio_wr(mdio_priv, MDIO_DATA, MDIO_DATA(value));
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(enetc_mdio_write);
 
 int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
 	struct enetc_mdio_priv *mdio_priv = bus->priv;
-	struct enetc_hw *hw = mdio_priv->hw;
 	u32 mdio_ctl, mdio_cfg;
 	u16 dev_addr, value;
 	int ret;
 
-	mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG;
+	mdio_cfg = ENETC_EMDIO_CFG;
 	if (regnum & MII_ADDR_C45) {
 		dev_addr = (regnum >> 16) & 0x1f;
 		mdio_cfg |= MDIO_CFG_ENC45;
@@ -107,86 +121,56 @@ int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 		mdio_cfg &= ~MDIO_CFG_ENC45;
 	}
 
-	enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+	enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg);
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
 	/* set port and device addr */
 	mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
-	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+	enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl);
 
 	/* set the register address */
 	if (regnum & MII_ADDR_C45) {
-		enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+		enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff);
 
-		ret = enetc_mdio_wait_complete(hw);
+		ret = enetc_mdio_wait_complete(mdio_priv);
 		if (ret)
 			return ret;
 	}
 
 	/* initiate the read */
-	enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
+	enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
 
-	ret = enetc_mdio_wait_complete(hw);
+	ret = enetc_mdio_wait_complete(mdio_priv);
 	if (ret)
 		return ret;
 
 	/* return all Fs if nothing was there */
-	if (enetc_mdio_rd(hw, MDIO_CFG) & MDIO_CFG_RD_ER) {
+	if (enetc_mdio_rd(mdio_priv, MDIO_CFG) & MDIO_CFG_RD_ER) {
 		dev_dbg(&bus->dev,
 			"Error while reading PHY%d reg at %d.%hhu\n",
 			phy_id, dev_addr, regnum);
 		return 0xffff;
 	}
 
-	value = enetc_mdio_rd(hw, MDIO_DATA) & 0xffff;
+	value = enetc_mdio_rd(mdio_priv, MDIO_DATA) & 0xffff;
 
 	return value;
 }
+EXPORT_SYMBOL_GPL(enetc_mdio_read);
 
-int enetc_mdio_probe(struct enetc_pf *pf)
+struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs)
 {
-	struct device *dev = &pf->si->pdev->dev;
-	struct enetc_mdio_priv *mdio_priv;
-	struct device_node *np;
-	struct mii_bus *bus;
-	int err;
-
-	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
-	if (!bus)
-		return -ENOMEM;
-
-	bus->name = "Freescale ENETC MDIO Bus";
-	bus->read = enetc_mdio_read;
-	bus->write = enetc_mdio_write;
-	bus->parent = dev;
-	mdio_priv = bus->priv;
-	mdio_priv->hw = &pf->si->hw;
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
-
-	np = of_get_child_by_name(dev->of_node, "mdio");
-	if (!np) {
-		dev_err(dev, "MDIO node missing\n");
-		return -EINVAL;
-	}
-
-	err = of_mdiobus_register(bus, np);
-	if (err) {
-		of_node_put(np);
-		dev_err(dev, "cannot register MDIO bus\n");
-		return err;
-	}
+	struct enetc_hw *hw;
 
-	of_node_put(np);
-	pf->mdio = bus;
+	hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
+	if (!hw)
+		return ERR_PTR(-ENOMEM);
 
-	return 0;
-}
+	hw->port = port_regs;
 
-void enetc_mdio_remove(struct enetc_pf *pf)
-{
-	if (pf->mdio)
-		mdiobus_unregister(pf->mdio);
+	return hw;
 }
+EXPORT_SYMBOL_GPL(enetc_hw_alloc);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
deleted file mode 100644
index 60c9a3889824..000000000000
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/* Copyright 2019 NXP */
-
-#include <linux/phy.h>
-#include "enetc_pf.h"
-
-struct enetc_mdio_priv {
-	struct enetc_hw *hw;
-};
-
-int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value);
-int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
index fbd41ce01f06..ebc635f8a4cc 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2019 NXP */
+#include <linux/fsl/enetc_mdio.h>
 #include <linux/of_mdio.h>
-#include "enetc_mdio.h"
+#include "enetc_pf.h"
 
 #define ENETC_MDIO_DEV_ID	0xee01
 #define ENETC_MDIO_DEV_NAME	"FSL PCIe IE Central MDIO"
@@ -13,17 +14,29 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
 {
 	struct enetc_mdio_priv *mdio_priv;
 	struct device *dev = &pdev->dev;
+	void __iomem *port_regs;
 	struct enetc_hw *hw;
 	struct mii_bus *bus;
 	int err;
 
-	hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
-	if (!hw)
-		return -ENOMEM;
+	port_regs = pci_iomap(pdev, 0, 0);
+	if (!port_regs) {
+		dev_err(dev, "iomap failed\n");
+		err = -ENXIO;
+		goto err_ioremap;
+	}
+
+	hw = enetc_hw_alloc(dev, port_regs);
+	if (IS_ERR(hw)) {
+		err = PTR_ERR(hw);
+		goto err_hw_alloc;
+	}
 
 	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
-	if (!bus)
-		return -ENOMEM;
+	if (!bus) {
+		err = -ENOMEM;
+		goto err_mdiobus_alloc;
+	}
 
 	bus->name = ENETC_MDIO_BUS_NAME;
 	bus->read = enetc_mdio_read;
@@ -31,13 +44,14 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
 	bus->parent = dev;
 	mdio_priv = bus->priv;
 	mdio_priv->hw = hw;
+	mdio_priv->mdio_base = ENETC_EMDIO_BASE;
 	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
 
 	pcie_flr(pdev);
 	err = pci_enable_device_mem(pdev);
 	if (err) {
 		dev_err(dev, "device enable failed\n");
-		return err;
+		goto err_pci_enable;
 	}
 
 	err = pci_request_region(pdev, 0, KBUILD_MODNAME);
@@ -46,13 +60,6 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
 		goto err_pci_mem_reg;
 	}
 
-	hw->port = pci_iomap(pdev, 0, 0);
-	if (!hw->port) {
-		err = -ENXIO;
-		dev_err(dev, "iomap failed\n");
-		goto err_ioremap;
-	}
-
 	err = of_mdiobus_register(bus, dev->of_node);
 	if (err)
 		goto err_mdiobus_reg;
@@ -62,12 +69,14 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
 	return 0;
 
 err_mdiobus_reg:
-	iounmap(mdio_priv->hw->port);
-err_ioremap:
 	pci_release_mem_regions(pdev);
 err_pci_mem_reg:
 	pci_disable_device(pdev);
-
+err_pci_enable:
+err_mdiobus_alloc:
+	iounmap(port_regs);
+err_hw_alloc:
+err_ioremap:
 	return err;
 }
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index e7482d483b28..fc0d7d99e9a1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -2,6 +2,7 @@
 /* Copyright 2017-2019 NXP */
 
 #include <linux/module.h>
+#include <linux/fsl/enetc_mdio.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include "enetc_pf.h"
@@ -749,6 +750,52 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
 	enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr);
 }
 
+static int enetc_mdio_probe(struct enetc_pf *pf)
+{
+	struct device *dev = &pf->si->pdev->dev;
+	struct enetc_mdio_priv *mdio_priv;
+	struct device_node *np;
+	struct mii_bus *bus;
+	int err;
+
+	bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+	if (!bus)
+		return -ENOMEM;
+
+	bus->name = "Freescale ENETC MDIO Bus";
+	bus->read = enetc_mdio_read;
+	bus->write = enetc_mdio_write;
+	bus->parent = dev;
+	mdio_priv = bus->priv;
+	mdio_priv->hw = &pf->si->hw;
+	mdio_priv->mdio_base = ENETC_EMDIO_BASE;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+
+	np = of_get_child_by_name(dev->of_node, "mdio");
+	if (!np) {
+		dev_err(dev, "MDIO node missing\n");
+		return -EINVAL;
+	}
+
+	err = of_mdiobus_register(bus, np);
+	if (err) {
+		of_node_put(np);
+		dev_err(dev, "cannot register MDIO bus\n");
+		return err;
+	}
+
+	of_node_put(np);
+	pf->mdio = bus;
+
+	return 0;
+}
+
+static void enetc_mdio_remove(struct enetc_pf *pf)
+{
+	if (pf->mdio)
+		mdiobus_unregister(pf->mdio);
+}
+
 static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
 {
 	struct enetc_pf *pf = enetc_si_priv(priv->si);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
index 10dd1b53bb08..59e65a6f6c3e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
@@ -49,7 +49,3 @@ struct enetc_pf {
 int enetc_msg_psi_init(struct enetc_pf *pf);
 void enetc_msg_psi_free(struct enetc_pf *pf);
 void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int mbox_id, u16 *status);
-
-/* MDIO */
-int enetc_mdio_probe(struct enetc_pf *pf);
-void enetc_mdio_remove(struct enetc_pf *pf);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 2e99438cb1bf..0c6bf3a55a9a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -36,7 +36,6 @@ void enetc_sched_speed_set(struct net_device *ndev)
 	case SPEED_10:
 	default:
 		pspeed = ENETC_PMR_PSPEED_10M;
-		netdev_err(ndev, "Qbv PSPEED set speed link down.\n");
 	}
 
 	priv->speed = speed;
@@ -156,6 +155,11 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data)
 	int err;
 	int i;
 
+	/* TSD and Qbv are mutually exclusive in hardware */
+	for (i = 0; i < priv->num_tx_rings; i++)
+		if (priv->tx_ring[i]->tsd_enable)
+			return -EBUSY;
+
 	for (i = 0; i < priv->num_tx_rings; i++)
 		enetc_set_bdr_prio(&priv->si->hw,
 				   priv->tx_ring[i]->index,
@@ -192,7 +196,6 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
 	u32 hi_credit_bit, hi_credit_reg;
 	u32 max_interference_size;
 	u32 port_frame_max_size;
-	u32 tc_max_sized_frame;
 	u8 tc = cbs->queue;
 	u8 prio_top, prio_next;
 	int bw_sum = 0;
@@ -250,7 +253,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
 		return -EINVAL;
 	}
 
-	tc_max_sized_frame = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc));
+	enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc));
 
 	/* For top prio TC, the max_interfrence_size is maxSizedFrame.
 	 *
@@ -298,3 +301,33 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
 
 	return 0;
 }
+
+int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct tc_etf_qopt_offload *qopt = type_data;
+	u8 tc_nums = netdev_get_num_tc(ndev);
+	int tc;
+
+	if (!tc_nums)
+		return -EOPNOTSUPP;
+
+	tc = qopt->queue;
+
+	if (tc < 0 || tc >= priv->num_tx_rings)
+		return -EINVAL;
+
+	/* Do not support TXSTART and TX CSUM offload simutaniously */
+	if (ndev->features & NETIF_F_CSUM_MASK)
+		return -EBUSY;
+
+	/* TSD and Qbv are mutually exclusive in hardware */
+	if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE)
+		return -EBUSY;
+
+	priv->tx_ring[tc]->tsd_enable = qopt->enable;
+	enetc_port_wr(&priv->si->hw, ENETC_PTCTSDR(tc),
+		      qopt->enable ? ENETC_TSDE : 0);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 9294027e9d90..4432a59904c7 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1141,7 +1141,7 @@ fec_stop(struct net_device *ndev)
 
 
 static void
-fec_timeout(struct net_device *ndev)
+fec_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 30cdb246d020..7a3f066e611d 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -84,7 +84,7 @@ static int debug = -1;	/* the above default */
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "debugging messages level");
 
-static void mpc52xx_fec_tx_timeout(struct net_device *dev)
+static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct mpc52xx_fec_priv *priv = netdev_priv(dev);
 	unsigned long flags;
@@ -785,16 +785,6 @@ static const struct ethtool_ops mpc52xx_fec_ethtool_ops = {
 };
 
 
-static int mpc52xx_fec_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	struct phy_device *phydev = dev->phydev;
-
-	if (!phydev)
-		return -ENOTSUPP;
-
-	return phy_mii_ioctl(phydev, rq, cmd);
-}
-
 static const struct net_device_ops mpc52xx_fec_netdev_ops = {
 	.ndo_open = mpc52xx_fec_open,
 	.ndo_stop = mpc52xx_fec_close,
@@ -802,7 +792,7 @@ static const struct net_device_ops mpc52xx_fec_netdev_ops = {
 	.ndo_set_rx_mode = mpc52xx_fec_set_multicast_list,
 	.ndo_set_mac_address = mpc52xx_fec_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
-	.ndo_do_ioctl = mpc52xx_fec_ioctl,
+	.ndo_do_ioctl = phy_do_ioctl,
 	.ndo_tx_timeout = mpc52xx_fec_tx_timeout,
 	.ndo_get_stats = mpc52xx_fec_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index 41c6fa200e74..e1901874c19f 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -110,7 +110,7 @@ do {									\
 /* Interface Mode Register (IF_MODE) */
 
 #define IF_MODE_MASK		0x00000003 /* 30-31 Mask on i/f mode bits */
-#define IF_MODE_XGMII		0x00000000 /* 30-31 XGMII (10G) interface */
+#define IF_MODE_10G		0x00000000 /* 30-31 10G interface */
 #define IF_MODE_GMII		0x00000002 /* 30-31 GMII (1G) interface */
 #define IF_MODE_RGMII		0x00000004
 #define IF_MODE_RGMII_AUTO	0x00008000
@@ -440,7 +440,7 @@ static int init(struct memac_regs __iomem *regs, struct memac_cfg *cfg,
 	tmp = 0;
 	switch (phy_if) {
 	case PHY_INTERFACE_MODE_XGMII:
-		tmp |= IF_MODE_XGMII;
+		tmp |= IF_MODE_10G;
 		break;
 	default:
 		tmp |= IF_MODE_GMII;
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index f0806ace1ae2..55f2122c3217 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -692,7 +692,7 @@ static int mac_probe(struct platform_device *_of_dev)
 
 	mac_dev->res = __devm_request_region(dev,
 					     fman_get_mem_region(priv->fman),
-					     res.start, res.end + 1 - res.start,
+					     res.start, resource_size(&res),
 					     "mac");
 	if (!mac_dev->res) {
 		dev_err(dev, "__devm_request_mem_region(mac) failed\n");
@@ -701,7 +701,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	}
 
 	priv->vaddr = devm_ioremap(dev, mac_dev->res->start,
-				   mac_dev->res->end + 1 - mac_dev->res->start);
+				   resource_size(mac_dev->res));
 	if (!priv->vaddr) {
 		dev_err(dev, "devm_ioremap() failed\n");
 		err = -EIO;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 3981c06f082f..add61fed33ee 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -641,7 +641,7 @@ static void fs_timeout_work(struct work_struct *work)
 		netif_wake_queue(dev);
 }
 
-static void fs_timeout(struct net_device *dev)
+static void fs_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 
@@ -882,14 +882,6 @@ static const struct ethtool_ops fs_ethtool_ops = {
 	.set_tunable = fs_set_tunable,
 };
 
-static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
 extern int fs_mii_connect(struct net_device *dev);
 extern void fs_mii_disconnect(struct net_device *dev);
 
@@ -907,7 +899,7 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 	.ndo_start_xmit		= fs_enet_start_xmit,
 	.ndo_tx_timeout		= fs_timeout,
 	.ndo_set_rx_mode	= fs_set_multicast_list,
-	.ndo_do_ioctl		= fs_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 72868a28b621..f7e5cafe89a9 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2093,7 +2093,7 @@ static void gfar_reset_task(struct work_struct *work)
 	reset_gfar(priv->ndev);
 }
 
-static void gfar_timeout(struct net_device *dev)
+static void gfar_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 
@@ -2205,13 +2205,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 	skb_dirtytx = tx_queue->skb_dirtytx;
 
 	while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
+		bool do_tstamp;
+
+		do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+			    priv->hwts_tx_en;
 
 		frags = skb_shinfo(skb)->nr_frags;
 
 		/* When time stamping, one additional TxBD must be freed.
 		 * Also, we need to dma_unmap_single() the TxPAL.
 		 */
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
+		if (unlikely(do_tstamp))
 			nr_txbds = frags + 2;
 		else
 			nr_txbds = frags + 1;
@@ -2225,7 +2229,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 		    (lstatus & BD_LENGTH_MASK))
 			break;
 
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+		if (unlikely(do_tstamp)) {
 			next = next_txbd(bdp, base, tx_ring_size);
 			buflen = be16_to_cpu(next->length) +
 				 GMAC_FCB_LEN + GMAC_TXPAL_LEN;
@@ -2235,7 +2239,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
 		dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
 				 buflen, DMA_TO_DEVICE);
 
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+		if (unlikely(do_tstamp)) {
 			struct skb_shared_hwtstamps shhwtstamps;
 			u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
 					  ~0x7UL);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index f839fa94ebdd..0d101c00286f 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3545,7 +3545,7 @@ static void ucc_geth_timeout_work(struct work_struct *work)
  * ucc_geth_timeout gets called when a packet has not been
  * transmitted after a set amount of time.
  */
-static void ucc_geth_timeout(struct net_device *dev)
+static void ucc_geth_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ucc_geth_private *ugeth = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index e03b30c60dcf..c82c85ef5fb3 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -49,6 +49,7 @@ struct tgec_mdio_controller {
 struct mdio_fsl_priv {
 	struct	tgec_mdio_controller __iomem *mdio_base;
 	bool	is_little_endian;
+	bool	has_a011043;
 };
 
 static u32 xgmac_read32(void __iomem *regs,
@@ -226,7 +227,8 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 		return ret;
 
 	/* Return all Fs if nothing was there */
-	if (xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) {
+	if ((xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) &&
+	    !priv->has_a011043) {
 		dev_err(&bus->dev,
 			"Error while reading PHY%d reg at %d.%hhu\n",
 			phy_id, dev_addr, regnum);
@@ -274,6 +276,9 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 	priv->is_little_endian = of_property_read_bool(pdev->dev.of_node,
 						       "little-endian");
 
+	priv->has_a011043 = of_property_read_bool(pdev->dev.of_node,
+						  "fsl,erratum-a011043");
+
 	ret = of_mdiobus_register(bus, np);
 	if (ret) {
 		dev_err(&pdev->dev, "cannot register MDIO bus\n");
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index 1eca0fdb9933..a7b7a4aace79 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -93,7 +93,7 @@ static irqreturn_t fjn_interrupt(int irq, void *dev_id);
 static void fjn_rx(struct net_device *dev);
 static void fjn_reset(struct net_device *dev);
 static void set_rx_mode(struct net_device *dev);
-static void fjn_tx_timeout(struct net_device *dev);
+static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static const struct ethtool_ops netdev_ethtool_ops;
 
 /*
@@ -774,7 +774,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id)
 
 /*====================================================================*/
 
-static void fjn_tx_timeout(struct net_device *dev)
+static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
     struct local_info *lp = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 9b7a8db9860f..e032563ceefd 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -845,7 +845,7 @@ static void gve_turnup(struct gve_priv *priv)
 	gve_set_napi_enabled(priv);
 }
 
-static void gve_tx_timeout(struct net_device *dev)
+static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct gve_priv *priv = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 150a8ccfb8b1..d9718b87279d 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -779,7 +779,7 @@ static int hip04_mac_stop(struct net_device *ndev)
 	return 0;
 }
 
-static void hip04_timeout(struct net_device *ndev)
+static void hip04_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct hip04_priv *priv = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 90ab7ade44c4..57c3bc4f7089 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -675,18 +675,6 @@ static void hisi_femac_net_set_rx_mode(struct net_device *dev)
 	}
 }
 
-static int hisi_femac_net_ioctl(struct net_device *dev,
-				struct ifreq *ifreq, int cmd)
-{
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	if (!dev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(dev->phydev, ifreq, cmd);
-}
-
 static const struct ethtool_ops hisi_femac_ethtools_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
@@ -697,7 +685,7 @@ static const struct net_device_ops hisi_femac_netdev_ops = {
 	.ndo_open		= hisi_femac_net_open,
 	.ndo_stop		= hisi_femac_net_close,
 	.ndo_start_xmit		= hisi_femac_net_xmit,
-	.ndo_do_ioctl		= hisi_femac_net_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_set_mac_address	= hisi_femac_set_mac_address,
 	.ndo_set_rx_mode	= hisi_femac_net_set_rx_mode,
 };
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 247de9105d10..4fb776920a93 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -893,7 +893,7 @@ static void hix5hd2_tx_timeout_task(struct work_struct *work)
 	hix5hd2_net_open(priv->netdev);
 }
 
-static void hix5hd2_net_timeout(struct net_device *dev)
+static void hix5hd2_net_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct hix5hd2_priv *priv = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index eb69e5c81a4d..c117074c16e3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1483,7 +1483,7 @@ static int hns_nic_net_stop(struct net_device *ndev)
 
 static void hns_tx_timeout_reset(struct hns_nic_priv *priv);
 #define HNS_TX_TIMEO_LIMIT (40 * HZ)
-static void hns_nic_net_timeout(struct net_device *ndev)
+static void hns_nic_net_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 
@@ -1497,20 +1497,6 @@ static void hns_nic_net_timeout(struct net_device *ndev)
 	}
 }
 
-static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
-			    int cmd)
-{
-	struct phy_device *phy_dev = netdev->phydev;
-
-	if (!netif_running(netdev))
-		return -EINVAL;
-
-	if (!phy_dev)
-		return -ENOTSUPP;
-
-	return phy_mii_ioctl(phy_dev, ifr, cmd);
-}
-
 static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb,
 				    struct net_device *ndev)
 {
@@ -1958,7 +1944,7 @@ static const struct net_device_ops hns_nic_netdev_ops = {
 	.ndo_tx_timeout = hns_nic_net_timeout,
 	.ndo_set_mac_address = hns_nic_net_set_mac_address,
 	.ndo_change_mtu = hns_nic_change_mtu,
-	.ndo_do_ioctl = hns_nic_do_ioctl,
+	.ndo_do_ioctl = phy_do_ioctl_running,
 	.ndo_set_features = hns_nic_set_features,
 	.ndo_fix_features = hns_nic_fix_features,
 	.ndo_get_stats64 = hns_nic_get_stats64,
diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile
index d01bf536eb86..7aa2fac76c5e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/Makefile
@@ -3,6 +3,8 @@
 # Makefile for the HISILICON network device drivers.
 #
 
+ccflags-y += -I$(srctree)/$(src)
+
 obj-$(CONFIG_HNS3) += hns3pf/
 obj-$(CONFIG_HNS3) += hns3vf/
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 3b5e2d7251e7..a3e4081b84ba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -164,11 +164,7 @@ enum hnae3_reset_type {
 	HNAE3_IMP_RESET,
 	HNAE3_UNKNOWN_RESET,
 	HNAE3_NONE_RESET,
-};
-
-enum hnae3_flr_state {
-	HNAE3_FLR_DOWN,
-	HNAE3_FLR_DONE,
+	HNAE3_MAX_RESET,
 };
 
 enum hnae3_port_base_vlan_state {
@@ -575,8 +571,7 @@ struct hnae3_ae_algo {
 	const struct pci_device_id *pdev_id_table;
 };
 
-#define HNAE3_INT_NAME_EXT_LEN    32	 /* Max extra information length */
-#define HNAE3_INT_NAME_LEN        (IFNAMSIZ + HNAE3_INT_NAME_EXT_LEN)
+#define HNAE3_INT_NAME_LEN        32
 #define HNAE3_ITR_COUNTDOWN_START 100
 
 struct hnae3_tc_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 6b328a259efc..1d4ffc5f408a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -176,7 +176,7 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 		return -EINVAL;
 	}
 
-	ring  = &priv->ring[q_num];
+	ring = &priv->ring[q_num];
 	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
 	tx_index = (cnt == 1) ? value : tx_index;
 
@@ -209,10 +209,10 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf)
 		 le16_to_cpu(tx_desc->tx.bdtp_fe_sc_vld_ra_ri));
 	dev_info(dev, "(TX)mss: %u\n", le16_to_cpu(tx_desc->tx.mss));
 
-	ring  = &priv->ring[q_num + h->kinfo.num_tqps];
+	ring = &priv->ring[q_num + h->kinfo.num_tqps];
 	value = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_TAIL_REG);
 	rx_index = (cnt == 1) ? value : tx_index;
-	rx_desc	 = &ring->desc[rx_index];
+	rx_desc = &ring->desc[rx_index];
 
 	addr = le64_to_cpu(rx_desc->addr);
 	dev_info(dev, "RX Queue Num: %u, BD Index: %u\n", q_num, rx_index);
@@ -297,8 +297,8 @@ static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer,
 	if (!buf)
 		return -ENOMEM;
 
-	len = snprintf(buf, HNS3_DBG_READ_LEN, "%s\n",
-		       "Please echo help to cmd to get help information");
+	len = scnprintf(buf, HNS3_DBG_READ_LEN, "%s\n",
+			"Please echo help to cmd to get help information");
 	uncopy_bytes = copy_to_user(buffer, buf, len);
 
 	kfree(buf);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index b3deb5e5ce29..acb796cc10d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -24,6 +24,12 @@
 
 #include "hnae3.h"
 #include "hns3_enet.h"
+/* All hns3 tracepoints are defined by the include below, which
+ * must be included exactly once across the whole kernel with
+ * CREATE_TRACE_POINTS defined
+ */
+#define CREATE_TRACE_POINTS
+#include "hns3_trace.h"
 
 #define hns3_set_field(origin, shift, val)	((origin) |= ((val) << (shift)))
 #define hns3_tx_bd_count(S)	DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
@@ -129,18 +135,21 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
 			continue;
 
 		if (tqp_vectors->tx_group.ring && tqp_vectors->rx_group.ring) {
-			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
-				 "%s-%s-%d", priv->netdev->name, "TxRx",
-				 txrx_int_idx++);
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN,
+				 "%s-%s-%s-%d", hns3_driver_name,
+				 pci_name(priv->ae_handle->pdev),
+				 "TxRx", txrx_int_idx++);
 			txrx_int_idx++;
 		} else if (tqp_vectors->rx_group.ring) {
-			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
-				 "%s-%s-%d", priv->netdev->name, "Rx",
-				 rx_int_idx++);
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN,
+				 "%s-%s-%s-%d", hns3_driver_name,
+				 pci_name(priv->ae_handle->pdev),
+				 "Rx", rx_int_idx++);
 		} else if (tqp_vectors->tx_group.ring) {
-			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
-				 "%s-%s-%d", priv->netdev->name, "Tx",
-				 tx_int_idx++);
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN,
+				 "%s-%s-%s-%d", hns3_driver_name,
+				 pci_name(priv->ae_handle->pdev),
+				 "Tx", tx_int_idx++);
 		} else {
 			/* Skip this unused q_vector */
 			continue;
@@ -157,6 +166,8 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
 			return ret;
 		}
 
+		disable_irq(tqp_vectors->vector_irq);
+
 		irq_set_affinity_hint(tqp_vectors->vector_irq,
 				      &tqp_vectors->affinity_mask);
 
@@ -175,6 +186,7 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector,
 static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector)
 {
 	napi_enable(&tqp_vector->napi);
+	enable_irq(tqp_vector->vector_irq);
 
 	/* enable vector */
 	hns3_mask_vector_irq(tqp_vector, 1);
@@ -374,18 +386,6 @@ static int hns3_nic_net_up(struct net_device *netdev)
 	if (ret)
 		return ret;
 
-	/* the device can work without cpu rmap, only aRFS needs it */
-	ret = hns3_set_rx_cpu_rmap(netdev);
-	if (ret)
-		netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret);
-
-	/* get irq resource for all vectors */
-	ret = hns3_nic_init_irq(priv);
-	if (ret) {
-		netdev_err(netdev, "init irq failed! ret=%d\n", ret);
-		goto free_rmap;
-	}
-
 	clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
 
 	/* enable the vectors */
@@ -398,22 +398,15 @@ static int hns3_nic_net_up(struct net_device *netdev)
 
 	/* start the ae_dev */
 	ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0;
-	if (ret)
-		goto out_start_err;
-
-	return 0;
-
-out_start_err:
-	set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
-	while (j--)
-		hns3_tqp_disable(h->kinfo.tqp[j]);
+	if (ret) {
+		set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+		while (j--)
+			hns3_tqp_disable(h->kinfo.tqp[j]);
 
-	for (j = i - 1; j >= 0; j--)
-		hns3_vector_disable(&priv->tqp_vector[j]);
+		for (j = i - 1; j >= 0; j--)
+			hns3_vector_disable(&priv->tqp_vector[j]);
+	}
 
-	hns3_nic_uninit_irq(priv);
-free_rmap:
-	hns3_free_rx_cpu_rmap(netdev);
 	return ret;
 }
 
@@ -510,11 +503,6 @@ static void hns3_nic_net_down(struct net_device *netdev)
 	if (ops->stop)
 		ops->stop(priv->ae_handle);
 
-	hns3_free_rx_cpu_rmap(netdev);
-
-	/* free irq resources */
-	hns3_nic_uninit_irq(priv);
-
 	/* delay ring buffer clearing to hns3_reset_notify_uninit_enet
 	 * during reset process, because driver may not be able
 	 * to disable the ring through firmware when downing the netdev.
@@ -736,6 +724,8 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
 	/* get MSS for TSO */
 	*mss = skb_shinfo(skb)->gso_size;
 
+	trace_hns3_tso(skb);
+
 	return 0;
 }
 
@@ -1140,6 +1130,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		desc->tx.bdtp_fe_sc_vld_ra_ri =
 			cpu_to_le16(BIT(HNS3_TXD_VLD_B));
 
+		trace_hns3_tx_desc(ring, ring->next_to_use);
 		ring_ptr_move_fw(ring, next_to_use);
 		return HNS3_LIKELY_BD_NUM;
 	}
@@ -1163,6 +1154,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
 		desc->tx.bdtp_fe_sc_vld_ra_ri =
 				cpu_to_le16(BIT(HNS3_TXD_VLD_B));
 
+		trace_hns3_tx_desc(ring, ring->next_to_use);
 		/* move ring pointer to next */
 		ring_ptr_move_fw(ring, next_to_use);
 
@@ -1288,6 +1280,14 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size,
 	return false;
 }
 
+void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size)
+{
+	int i = 0;
+
+	for (i = 0; i < MAX_SKB_FRAGS; i++)
+		size[i] = skb_frag_size(&shinfo->frags[i]);
+}
+
 static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 				  struct net_device *netdev,
 				  struct sk_buff *skb)
@@ -1299,8 +1299,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 	bd_num = hns3_tx_bd_num(skb, bd_size);
 	if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
 		if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
-		    !hns3_skb_need_linearized(skb, bd_size, bd_num))
+		    !hns3_skb_need_linearized(skb, bd_size, bd_num)) {
+			trace_hns3_over_8bd(skb);
 			goto out;
+		}
 
 		if (__skb_linearize(skb))
 			return -ENOMEM;
@@ -1308,8 +1310,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 		bd_num = hns3_tx_bd_count(skb->len);
 		if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
 		    (!skb_is_gso(skb) &&
-		     bd_num > HNS3_MAX_NON_TSO_BD_NUM))
+		     bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
+			trace_hns3_over_8bd(skb);
 			return -ENOMEM;
+		}
 
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.tx_copy++;
@@ -1454,6 +1458,7 @@ out:
 					(ring->desc_num - 1);
 	ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
 				cpu_to_le16(BIT(HNS3_TXD_FE_B));
+	trace_hns3_tx_desc(ring, pre_ntu);
 
 	/* Complete translate all packets */
 	dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
@@ -1562,6 +1567,37 @@ static int hns3_nic_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static netdev_features_t hns3_features_check(struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+#define HNS3_MAX_HDR_LEN	480U
+#define HNS3_MAX_L4_HDR_LEN	60U
+
+	size_t len;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	if (skb->encapsulation)
+		len = skb_inner_transport_header(skb) - skb->data;
+	else
+		len = skb_transport_header(skb) - skb->data;
+
+	/* Assume L4 is 60 byte as TCP is the only protocol with a
+	 * a flexible value, and it's max len is 60 bytes.
+	 */
+	len += HNS3_MAX_L4_HDR_LEN;
+
+	/* Hardware only supports checksum on the skb with a max header
+	 * len of 480 bytes.
+	 */
+	if (len > HNS3_MAX_HDR_LEN)
+		features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+
+	return features;
+}
+
 static void hns3_nic_get_stats64(struct net_device *netdev,
 				 struct rtnl_link_stats64 *stats)
 {
@@ -1875,7 +1911,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 	return true;
 }
 
-static void hns3_nic_net_timeout(struct net_device *ndev)
+static void hns3_nic_net_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
 	struct hnae3_handle *h = priv->ae_handle;
@@ -1976,6 +2012,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
 	.ndo_do_ioctl		= hns3_nic_do_ioctl,
 	.ndo_change_mtu		= hns3_nic_change_mtu,
 	.ndo_set_features	= hns3_nic_set_features,
+	.ndo_features_check	= hns3_features_check,
 	.ndo_get_stats64	= hns3_nic_get_stats64,
 	.ndo_setup_tc		= hns3_nic_setup_tc,
 	.ndo_set_rx_mode	= hns3_nic_set_rx_mode,
@@ -2057,10 +2094,8 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int ret;
 
 	ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev), GFP_KERNEL);
-	if (!ae_dev) {
-		ret = -ENOMEM;
-		return ret;
-	}
+	if (!ae_dev)
+		return -ENOMEM;
 
 	ae_dev->pdev = pdev;
 	ae_dev->flag = ent->driver_data;
@@ -2503,8 +2538,8 @@ void hns3_clean_tx_ring(struct hns3_enet_ring *ring)
 	rmb(); /* Make sure head is ready before touch any data */
 
 	if (unlikely(!is_valid_clean_head(ring, head))) {
-		netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
-			   ring->next_to_use, ring->next_to_clean);
+		hns3_rl_err(netdev, "wrong head (%d, %d-%d)\n", head,
+			    ring->next_to_use, ring->next_to_clean);
 
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.io_err_cnt++;
@@ -2590,6 +2625,12 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
 	writel_relaxed(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
 }
 
+static bool hns3_page_is_reusable(struct page *page)
+{
+	return page_to_nid(page) == numa_mem_id() &&
+		!page_is_pfmemalloc(page);
+}
+
 static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 				struct hns3_enet_ring *ring, int pull_len,
 				struct hns3_desc_cb *desc_cb)
@@ -2604,7 +2645,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	/* Avoid re-using remote pages, or the stack is still using the page
 	 * when page_offset rollback to zero, flag default unreuse
 	 */
-	if (unlikely(page_to_nid(desc_cb->priv) != numa_mem_id()) ||
+	if (unlikely(!hns3_page_is_reusable(desc_cb->priv)) ||
 	    (!desc_cb->page_offset && page_count(desc_cb->priv) > 1))
 		return;
 
@@ -2674,6 +2715,9 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
 	skb->csum_start = (unsigned char *)th - skb->head;
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
+
+	trace_hns3_gro(skb);
+
 	return 0;
 }
 
@@ -2794,7 +2838,6 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
 static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 			  unsigned char *va)
 {
-#define HNS3_NEED_ADD_FRAG	1
 	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
 	struct net_device *netdev = ring_to_netdev(ring);
 	struct sk_buff *skb;
@@ -2811,6 +2854,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 		return -ENOMEM;
 	}
 
+	trace_hns3_rx_desc(ring);
 	prefetchw(skb->data);
 
 	ring->pending_buf = 1;
@@ -2820,7 +2864,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
 
 		/* We can reuse buffer as-is, just make sure it is local */
-		if (likely(page_to_nid(desc_cb->priv) == numa_mem_id()))
+		if (likely(hns3_page_is_reusable(desc_cb->priv)))
 			desc_cb->reuse_flag = 1;
 		else /* This page cannot be reused so discard it */
 			put_page(desc_cb->priv);
@@ -2838,33 +2882,19 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
 			    desc_cb);
 	ring_ptr_move_fw(ring, next_to_clean);
 
-	return HNS3_NEED_ADD_FRAG;
+	return 0;
 }
 
-static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
-			 bool pending)
+static int hns3_add_frag(struct hns3_enet_ring *ring)
 {
 	struct sk_buff *skb = ring->skb;
 	struct sk_buff *head_skb = skb;
 	struct sk_buff *new_skb;
 	struct hns3_desc_cb *desc_cb;
-	struct hns3_desc *pre_desc;
+	struct hns3_desc *desc;
 	u32 bd_base_info;
-	int pre_bd;
 
-	/* if there is pending bd, the SW param next_to_clean has moved
-	 * to next and the next is NULL
-	 */
-	if (pending) {
-		pre_bd = (ring->next_to_clean - 1 + ring->desc_num) %
-			 ring->desc_num;
-		pre_desc = &ring->desc[pre_bd];
-		bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info);
-	} else {
-		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
-	}
-
-	while (!(bd_base_info & BIT(HNS3_RXD_FE_B))) {
+	do {
 		desc = &ring->desc[ring->next_to_clean];
 		desc_cb = &ring->desc_cb[ring->next_to_clean];
 		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
@@ -2899,9 +2929,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
 		}
 
 		hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb);
+		trace_hns3_rx_desc(ring);
 		ring_ptr_move_fw(ring, next_to_clean);
 		ring->pending_buf++;
-	}
+	} while (!(bd_base_info & BIT(HNS3_RXD_FE_B)));
 
 	return 0;
 }
@@ -3069,28 +3100,23 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
 
 		if (ret < 0) /* alloc buffer fail */
 			return ret;
-		if (ret > 0) { /* need add frag */
-			ret = hns3_add_frag(ring, desc, false);
+		if (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { /* need add frag */
+			ret = hns3_add_frag(ring);
 			if (ret)
 				return ret;
-
-			/* As the head data may be changed when GRO enable, copy
-			 * the head data in after other data rx completed
-			 */
-			memcpy(skb->data, ring->va,
-			       ALIGN(ring->pull_len, sizeof(long)));
 		}
 	} else {
-		ret = hns3_add_frag(ring, desc, true);
+		ret = hns3_add_frag(ring);
 		if (ret)
 			return ret;
+	}
 
-		/* As the head data may be changed when GRO enable, copy
-		 * the head data in after other data rx completed
-		 */
+	/* As the head data may be changed when GRO enable, copy
+	 * the head data in after other data rx completed
+	 */
+	if (skb->len > HNS3_RX_HEAD_SIZE)
 		memcpy(skb->data, ring->va,
 		       ALIGN(ring->pull_len, sizeof(long)));
-	}
 
 	ret = hns3_handle_bdinfo(ring, skb);
 	if (unlikely(ret)) {
@@ -3596,26 +3622,25 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring)
 			continue;
 
-		hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain);
+		/* Since the mapping can be overwritten, when fail to get the
+		 * chain between vector and ring, we should go on to deal with
+		 * the remaining options.
+		 */
+		if (hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain))
+			dev_warn(priv->dev, "failed to get ring chain\n");
 
 		h->ae_algo->ops->unmap_ring_from_vector(h,
 			tqp_vector->vector_irq, &vector_ring_chain);
 
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
-		if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
-			irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
-			free_irq(tqp_vector->vector_irq, tqp_vector);
-			tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
-		}
-
 		hns3_clear_ring_group(&tqp_vector->rx_group);
 		hns3_clear_ring_group(&tqp_vector->tx_group);
 		netif_napi_del(&priv->tqp_vector[i].napi);
 	}
 }
 
-static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
+static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
 	struct pci_dev *pdev = h->pdev;
@@ -3627,11 +3652,10 @@ static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
 		tqp_vector = &priv->tqp_vector[i];
 		ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
 		if (ret)
-			return ret;
+			return;
 	}
 
 	devm_kfree(&pdev->dev, priv->tqp_vector);
-	return 0;
 }
 
 static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
@@ -4030,6 +4054,18 @@ static int hns3_client_init(struct hnae3_handle *handle)
 		goto out_reg_netdev_fail;
 	}
 
+	/* the device can work without cpu rmap, only aRFS needs it */
+	ret = hns3_set_rx_cpu_rmap(netdev);
+	if (ret)
+		dev_warn(priv->dev, "set rx cpu rmap fail, ret=%d\n", ret);
+
+	ret = hns3_nic_init_irq(priv);
+	if (ret) {
+		dev_err(priv->dev, "init irq failed! ret=%d\n", ret);
+		hns3_free_rx_cpu_rmap(netdev);
+		goto out_init_irq_fail;
+	}
+
 	ret = hns3_client_start(handle);
 	if (ret) {
 		dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
@@ -4051,6 +4087,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	return ret;
 
 out_client_start:
+	hns3_free_rx_cpu_rmap(netdev);
+	hns3_nic_uninit_irq(priv);
+out_init_irq_fail:
 	unregister_netdev(netdev);
 out_reg_netdev_fail:
 	hns3_uninit_phy(netdev);
@@ -4088,15 +4127,17 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 		goto out_netdev_free;
 	}
 
+	hns3_free_rx_cpu_rmap(netdev);
+
+	hns3_nic_uninit_irq(priv);
+
 	hns3_del_all_fd_rules(netdev, true);
 
 	hns3_clear_all_ring(handle, true);
 
 	hns3_nic_uninit_vector_data(priv);
 
-	ret = hns3_nic_dealloc_vector_data(priv);
-	if (ret)
-		netdev_err(netdev, "dealloc vector error\n");
+	hns3_nic_dealloc_vector_data(priv);
 
 	ret = hns3_uninit_all_ring(priv);
 	if (ret)
@@ -4423,17 +4464,32 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	if (ret)
 		goto err_uninit_vector;
 
+	/* the device can work without cpu rmap, only aRFS needs it */
+	ret = hns3_set_rx_cpu_rmap(netdev);
+	if (ret)
+		dev_warn(priv->dev, "set rx cpu rmap fail, ret=%d\n", ret);
+
+	ret = hns3_nic_init_irq(priv);
+	if (ret) {
+		dev_err(priv->dev, "init irq failed! ret=%d\n", ret);
+		hns3_free_rx_cpu_rmap(netdev);
+		goto err_init_irq_fail;
+	}
+
 	ret = hns3_client_start(handle);
 	if (ret) {
 		dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
-		goto err_uninit_ring;
+		goto err_client_start_fail;
 	}
 
 	set_bit(HNS3_NIC_STATE_INITED, &priv->state);
 
 	return ret;
 
-err_uninit_ring:
+err_client_start_fail:
+	hns3_free_rx_cpu_rmap(netdev);
+	hns3_nic_uninit_irq(priv);
+err_init_irq_fail:
 	hns3_uninit_all_ring(priv);
 err_uninit_vector:
 	hns3_nic_uninit_vector_data(priv);
@@ -4483,6 +4539,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 		return 0;
 	}
 
+	hns3_free_rx_cpu_rmap(netdev);
+	hns3_nic_uninit_irq(priv);
 	hns3_clear_all_ring(handle, true);
 	hns3_reset_tx_queue(priv->ae_handle);
 
@@ -4490,9 +4548,7 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 
 	hns3_store_coal(priv);
 
-	ret = hns3_nic_dealloc_vector_data(priv);
-	if (ret)
-		netdev_err(netdev, "dealloc vector error\n");
+	hns3_nic_dealloc_vector_data(priv);
 
 	ret = hns3_uninit_all_ring(priv);
 	if (ret)
@@ -4658,7 +4714,7 @@ static int __init hns3_init_module(void)
 	pr_info("%s: %s\n", hns3_driver_name, hns3_copyright);
 
 	client.type = HNAE3_CLIENT_KNIC;
-	snprintf(client.name, HNAE3_CLIENT_NAME_LENGTH - 1, "%s",
+	snprintf(client.name, HNAE3_CLIENT_NAME_LENGTH, "%s",
 		 hns3_driver_name);
 
 	client.ops = &client_ops;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 9d47abd5c37c..abefd7a179f7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -673,4 +673,5 @@ void hns3_dbg_init(struct hnae3_handle *handle);
 void hns3_dbg_uninit(struct hnae3_handle *handle);
 void hns3_dbg_register_debugfs(const char *debugfs_dir_name);
 void hns3_dbg_unregister_debugfs(void);
+void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 6e0212b79438..c03856e63320 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -423,9 +423,8 @@ static void *hns3_update_strings(u8 *data, const struct hns3_stats *stats,
 			data[ETH_GSTRING_LEN - 1] = '\0';
 
 			/* first, prepend the prefix string */
-			n1 = snprintf(data, MAX_PREFIX_SIZE, "%s%d_",
-				      prefix, i);
-			n1 = min_t(uint, n1, MAX_PREFIX_SIZE - 1);
+			n1 = scnprintf(data, MAX_PREFIX_SIZE, "%s%d_",
+				       prefix, i);
 			size_left = (ETH_GSTRING_LEN - 1) - n1;
 
 			/* now, concatenate the stats string to it */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h
new file mode 100644
index 000000000000..7bddcca148a5
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2018-2019 Hisilicon Limited. */
+
+/* This must be outside ifdef _HNS3_TRACE_H */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns3
+
+#if !defined(_HNS3_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _HNS3_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#define DESC_NR		(sizeof(struct hns3_desc) / sizeof(u32))
+
+DECLARE_EVENT_CLASS(hns3_skb_template,
+	TP_PROTO(struct sk_buff *skb),
+	TP_ARGS(skb),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, headlen)
+		__field(unsigned int, len)
+		__field(__u8, nr_frags)
+		__field(__u8, ip_summed)
+		__field(unsigned int, hdr_len)
+		__field(unsigned short, gso_size)
+		__field(unsigned short, gso_segs)
+		__field(unsigned int, gso_type)
+		__field(bool, fraglist)
+		__array(__u32, size, MAX_SKB_FRAGS)
+	),
+
+	TP_fast_assign(
+		__entry->headlen = skb_headlen(skb);
+		__entry->len = skb->len;
+		__entry->nr_frags = skb_shinfo(skb)->nr_frags;
+		__entry->gso_size = skb_shinfo(skb)->gso_size;
+		__entry->gso_segs = skb_shinfo(skb)->gso_segs;
+		__entry->gso_type = skb_shinfo(skb)->gso_type;
+		__entry->hdr_len = skb->encapsulation ?
+		skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb) :
+		skb_transport_offset(skb) + tcp_hdrlen(skb);
+		__entry->ip_summed = skb->ip_summed;
+		__entry->fraglist = skb_has_frag_list(skb);
+		hns3_shinfo_pack(skb_shinfo(skb), __entry->size);
+	),
+
+	TP_printk(
+		"len: %u, %u, %u, cs: %u, gso: %u, %u, %x, frag(%d %u): %s",
+		__entry->headlen, __entry->len, __entry->hdr_len,
+		__entry->ip_summed, __entry->gso_size, __entry->gso_segs,
+		__entry->gso_type, __entry->fraglist, __entry->nr_frags,
+		__print_array(__entry->size, MAX_SKB_FRAGS, sizeof(__u32))
+	)
+);
+
+DEFINE_EVENT(hns3_skb_template, hns3_over_8bd,
+	TP_PROTO(struct sk_buff *skb),
+	TP_ARGS(skb));
+
+DEFINE_EVENT(hns3_skb_template, hns3_gro,
+	TP_PROTO(struct sk_buff *skb),
+	TP_ARGS(skb));
+
+DEFINE_EVENT(hns3_skb_template, hns3_tso,
+	TP_PROTO(struct sk_buff *skb),
+	TP_ARGS(skb));
+
+TRACE_EVENT(hns3_tx_desc,
+	TP_PROTO(struct hns3_enet_ring *ring, int cur_ntu),
+	TP_ARGS(ring, cur_ntu),
+
+	TP_STRUCT__entry(
+		__field(int, index)
+		__field(int, ntu)
+		__field(int, ntc)
+		__field(dma_addr_t, desc_dma)
+		__array(u32, desc, DESC_NR)
+		__string(devname, ring->tqp->handle->kinfo.netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->index = ring->tqp->tqp_index;
+		__entry->ntu = ring->next_to_use;
+		__entry->ntc = ring->next_to_clean;
+		__entry->desc_dma = ring->desc_dma_addr,
+		memcpy(__entry->desc, &ring->desc[cur_ntu],
+		       sizeof(struct hns3_desc));
+		__assign_str(devname, ring->tqp->handle->kinfo.netdev->name);
+	),
+
+	TP_printk(
+		"%s-%d-%d/%d desc(%pad): %s",
+		__get_str(devname), __entry->index, __entry->ntu,
+		__entry->ntc, &__entry->desc_dma,
+		__print_array(__entry->desc, DESC_NR, sizeof(u32))
+	)
+);
+
+TRACE_EVENT(hns3_rx_desc,
+	TP_PROTO(struct hns3_enet_ring *ring),
+	TP_ARGS(ring),
+
+	TP_STRUCT__entry(
+		__field(int, index)
+		__field(int, ntu)
+		__field(int, ntc)
+		__field(dma_addr_t, desc_dma)
+		__field(dma_addr_t, buf_dma)
+		__array(u32, desc, DESC_NR)
+		__string(devname, ring->tqp->handle->kinfo.netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->index = ring->tqp->tqp_index;
+		__entry->ntu = ring->next_to_use;
+		__entry->ntc = ring->next_to_clean;
+		__entry->desc_dma = ring->desc_dma_addr;
+		__entry->buf_dma = ring->desc_cb[ring->next_to_clean].dma;
+		memcpy(__entry->desc, &ring->desc[ring->next_to_clean],
+		       sizeof(struct hns3_desc));
+		__assign_str(devname, ring->tqp->handle->kinfo.netdev->name);
+	),
+
+	TP_printk(
+		"%s-%d-%d/%d desc(%pad) buf(%pad): %s",
+		__get_str(devname), __entry->index, __entry->ntu,
+		__entry->ntc, &__entry->desc_dma, &__entry->buf_dma,
+		__print_array(__entry->desc, DESC_NR, sizeof(u32))
+	)
+);
+
+#endif /* _HNS3_TRACE_H_ */
+
+/* This must be outside ifdef _HNS3_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hns3_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 940ead3970d1..7f509eff562e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -479,19 +479,6 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
 	hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
 }
 
-static void hclge_destroy_queue(struct hclge_cmq_ring *ring)
-{
-	spin_lock(&ring->lock);
-	hclge_free_cmd_desc(ring);
-	spin_unlock(&ring->lock);
-}
-
-static void hclge_destroy_cmd_queue(struct hclge_hw *hw)
-{
-	hclge_destroy_queue(&hw->cmq.csq);
-	hclge_destroy_queue(&hw->cmq.crq);
-}
-
 void hclge_cmd_uninit(struct hclge_dev *hdev)
 {
 	spin_lock_bh(&hdev->hw.cmq.csq.lock);
@@ -501,5 +488,6 @@ void hclge_cmd_uninit(struct hclge_dev *hdev)
 	spin_unlock(&hdev->hw.cmq.crq.lock);
 	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
 
-	hclge_destroy_cmd_queue(&hdev->hw);
+	hclge_free_cmd_desc(&hdev->hw.cmq.csq);
+	hclge_free_cmd_desc(&hdev->hw.cmq.crq);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index d97da67f07a1..96498d9b4754 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 #include <linux/etherdevice.h>
+#include "hnae3.h"
 
 #define HCLGE_CMDQ_TX_TIMEOUT		30000
 #define HCLGE_DESC_DATA_LEN		6
@@ -63,6 +64,7 @@ enum hclge_cmd_status {
 struct hclge_misc_vector {
 	u8 __iomem *addr;
 	int vector_irq;
+	char name[HNAE3_INT_NAME_LEN];
 };
 
 struct hclge_cmq {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 112df34b3869..67fad80035d3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -73,8 +73,6 @@ static struct hclge_dbg_reg_type_info hclge_dbg_reg_info[] = {
 
 static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset)
 {
-#define HCLGE_GET_DFX_REG_TYPE_CNT	4
-
 	struct hclge_desc desc[HCLGE_GET_DFX_REG_TYPE_CNT];
 	int entries_per_desc;
 	int index;
@@ -886,8 +884,8 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
 	}
 }
 
-static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
-				   bool sel_x, u32 loc)
+static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
+				  bool sel_x, u32 loc)
 {
 	struct hclge_fd_tcam_config_1_cmd *req1;
 	struct hclge_fd_tcam_config_2_cmd *req2;
@@ -912,7 +910,7 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
 
 	ret = hclge_cmd_send(&hdev->hw, desc, 3);
 	if (ret)
-		return;
+		return ret;
 
 	dev_info(&hdev->pdev->dev, " read result tcam key %s(%u):\n",
 		 sel_x ? "x" : "y", loc);
@@ -931,16 +929,76 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
 	req = (u32 *)req3->tcam_data;
 	for (i = 0; i < 5; i++)
 		dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+
+	return ret;
+}
+
+static int hclge_dbg_get_rules_location(struct hclge_dev *hdev, u16 *rule_locs)
+{
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	int cnt = 0;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		rule_locs[cnt] = rule->location;
+		cnt++;
+	}
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	if (cnt != hdev->hclge_fd_rule_num)
+		return -EINVAL;
+
+	return cnt;
 }
 
 static void hclge_dbg_fd_tcam(struct hclge_dev *hdev)
 {
-	u32 i;
+	int i, ret, rule_cnt;
+	u16 *rule_locs;
+
+	if (!hnae3_dev_fd_supported(hdev)) {
+		dev_err(&hdev->pdev->dev,
+			"Only FD-supported dev supports dump fd tcam\n");
+		return;
+	}
+
+	if (!hdev->hclge_fd_rule_num ||
+	    !hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+		return;
 
-	for (i = 0; i < hdev->fd_cfg.rule_num[0]; i++) {
-		hclge_dbg_fd_tcam_read(hdev, 0, true, i);
-		hclge_dbg_fd_tcam_read(hdev, 0, false, i);
+	rule_locs = kcalloc(hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1],
+			    sizeof(u16), GFP_KERNEL);
+	if (!rule_locs)
+		return;
+
+	rule_cnt = hclge_dbg_get_rules_location(hdev, rule_locs);
+	if (rule_cnt <= 0) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get rule number, ret = %d\n", rule_cnt);
+		kfree(rule_locs);
+		return;
 	}
+
+	for (i = 0; i < rule_cnt; i++) {
+		ret = hclge_dbg_fd_tcam_read(hdev, 0, true, rule_locs[i]);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to get fd tcam key x, ret = %d\n", ret);
+			kfree(rule_locs);
+			return;
+		}
+
+		ret = hclge_dbg_fd_tcam_read(hdev, 0, false, rule_locs[i]);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to get fd tcam key y, ret = %d\n", ret);
+			kfree(rule_locs);
+			return;
+		}
+	}
+
+	kfree(rule_locs);
 }
 
 void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
@@ -976,6 +1034,14 @@ void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
 	dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
 }
 
+static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev)
+{
+	dev_info(&hdev->pdev->dev, "last_serv_processed: %lu\n",
+		 hdev->last_serv_processed);
+	dev_info(&hdev->pdev->dev, "last_serv_cnt: %lu\n",
+		 hdev->serv_processed_cnt);
+}
+
 static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
 {
 	struct hclge_desc *desc_src, *desc_tmp;
@@ -1227,6 +1293,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
 	} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
 		hclge_dbg_dump_rst_info(hdev);
+	} else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
+		hclge_dbg_dump_serv_info(hdev);
 	} else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
 		hclge_dbg_get_m7_stats_info(hdev);
 	} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index dc66b4e13377..c85b72dc44d2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -505,7 +505,7 @@ static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
 
 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
 	{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
-	  .reset_level = HNAE3_GLOBAL_RESET },
+	  .reset_level = HNAE3_FUNC_RESET },
 	{ .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
 	  .reset_level = HNAE3_GLOBAL_RESET },
 	{ .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
@@ -599,7 +599,7 @@ static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
 
 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
 	{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
-	  .reset_level = HNAE3_GLOBAL_RESET },
+	  .reset_level = HNAE3_FUNC_RESET },
 	{ .int_msk = BIT(9), .msg = "low_water_line_err_port",
 	  .reset_level = HNAE3_NONE_RESET },
 	{ .int_msk = BIT(10), .msg = "hi_water_line_err_port",
@@ -1898,10 +1898,8 @@ static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
 
 	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
 	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
-	if (!desc) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!desc)
+		return -ENOMEM;
 
 	ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num,
 					  reset_requests);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 13dbd249f35f..ec5f6eeb639b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -72,6 +72,8 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
 
+static struct workqueue_struct *hclge_wq;
+
 static const struct pci_device_id ae_algo_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
@@ -416,7 +418,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
 {
 #define HCLGE_MAC_CMD_NUM 21
 
-	u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
+	u64 *data = (u64 *)(&hdev->mac_stats);
 	struct hclge_desc desc[HCLGE_MAC_CMD_NUM];
 	__le64 *desc_data;
 	int i, k, n;
@@ -453,7 +455,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
 
 static int hclge_mac_update_stats_complete(struct hclge_dev *hdev, u32 desc_num)
 {
-	u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
+	u64 *data = (u64 *)(&hdev->mac_stats);
 	struct hclge_desc *desc;
 	__le64 *desc_data;
 	u16 i, k, n;
@@ -802,7 +804,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data)
 	struct hclge_dev *hdev = vport->back;
 	u64 *p;
 
-	p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string,
+	p = hclge_comm_get_stats(&hdev->mac_stats, g_mac_stats_string,
 				 ARRAY_SIZE(g_mac_stats_string), data);
 	p = hclge_tqps_get_stats(handle, p);
 }
@@ -815,8 +817,8 @@ static void hclge_get_mac_stat(struct hnae3_handle *handle,
 
 	hclge_update_stats(handle, NULL);
 
-	mac_stats->tx_pause_cnt = hdev->hw_stats.mac_stats.mac_tx_mac_pause_num;
-	mac_stats->rx_pause_cnt = hdev->hw_stats.mac_stats.mac_rx_mac_pause_num;
+	mac_stats->tx_pause_cnt = hdev->mac_stats.mac_tx_mac_pause_num;
+	mac_stats->rx_pause_cnt = hdev->mac_stats.mac_rx_mac_pause_num;
 }
 
 static int hclge_parse_func_status(struct hclge_dev *hdev,
@@ -860,9 +862,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev)
 		usleep_range(1000, 2000);
 	} while (timeout++ < HCLGE_QUERY_MAX_CNT);
 
-	ret = hclge_parse_func_status(hdev, req);
-
-	return ret;
+	return hclge_parse_func_status(hdev, req);
 }
 
 static int hclge_query_pf_resource(struct hclge_dev *hdev)
@@ -880,12 +880,12 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 	}
 
 	req = (struct hclge_pf_res_cmd *)desc.data;
-	hdev->num_tqps = __le16_to_cpu(req->tqp_num);
-	hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
+	hdev->num_tqps = le16_to_cpu(req->tqp_num);
+	hdev->pkt_buf_size = le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
 
 	if (req->tx_buf_size)
 		hdev->tx_buf_size =
-			__le16_to_cpu(req->tx_buf_size) << HCLGE_BUF_UNIT_S;
+			le16_to_cpu(req->tx_buf_size) << HCLGE_BUF_UNIT_S;
 	else
 		hdev->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
 
@@ -893,7 +893,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 
 	if (req->dv_buf_size)
 		hdev->dv_buf_size =
-			__le16_to_cpu(req->dv_buf_size) << HCLGE_BUF_UNIT_S;
+			le16_to_cpu(req->dv_buf_size) << HCLGE_BUF_UNIT_S;
 	else
 		hdev->dv_buf_size = HCLGE_DEFAULT_DV;
 
@@ -901,10 +901,10 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 
 	if (hnae3_dev_roce_supported(hdev)) {
 		hdev->roce_base_msix_offset =
-		hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
+		hnae3_get_field(le16_to_cpu(req->msixcap_localid_ba_rocee),
 				HCLGE_MSIX_OFT_ROCEE_M, HCLGE_MSIX_OFT_ROCEE_S);
 		hdev->num_roce_msi =
-		hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+		hnae3_get_field(le16_to_cpu(req->pf_intr_vector_number),
 				HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
 
 		/* nic's msix numbers is always equals to the roce's. */
@@ -917,7 +917,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 				hdev->roce_base_msix_offset;
 	} else {
 		hdev->num_msi =
-		hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+		hnae3_get_field(le16_to_cpu(req->pf_intr_vector_number),
 				HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
 
 		hdev->num_nic_msi = hdev->num_msi;
@@ -1331,11 +1331,7 @@ static int hclge_get_cap(struct hclge_dev *hdev)
 	}
 
 	/* get pf resource */
-	ret = hclge_query_pf_resource(hdev);
-	if (ret)
-		dev_err(&hdev->pdev->dev, "query pf resource error %d.\n", ret);
-
-	return ret;
+	return hclge_query_pf_resource(hdev);
 }
 
 static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev)
@@ -2619,30 +2615,21 @@ static int hclge_mac_init(struct hclge_dev *hdev)
 	hdev->hw.mac.duplex = HCLGE_MAC_FULL;
 	ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed,
 					 hdev->hw.mac.duplex);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Config mac speed dup fail ret=%d\n", ret);
+	if (ret)
 		return ret;
-	}
 
 	if (hdev->hw.mac.support_autoneg) {
 		ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg);
-		if (ret) {
-			dev_err(&hdev->pdev->dev,
-				"Config mac autoneg fail ret=%d\n", ret);
+		if (ret)
 			return ret;
-		}
 	}
 
 	mac->link = 0;
 
 	if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) {
 		ret = hclge_set_fec_hw(hdev, mac->user_fec_mode);
-		if (ret) {
-			dev_err(&hdev->pdev->dev,
-				"Fec mode init fail, ret = %d\n", ret);
+		if (ret)
 			return ret;
-		}
 	}
 
 	ret = hclge_set_mac_mtu(hdev, hdev->mps);
@@ -2665,31 +2652,27 @@ static int hclge_mac_init(struct hclge_dev *hdev)
 
 static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
 {
-	if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
+	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
 	    !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
-		queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
-			      &hdev->mbx_service_task);
+		mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
+				    hclge_wq, &hdev->service_task, 0);
 }
 
 static void hclge_reset_task_schedule(struct hclge_dev *hdev)
 {
 	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
 	    !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
-		queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
-			      &hdev->rst_service_task);
+		mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
+				    hclge_wq, &hdev->service_task, 0);
 }
 
 void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time)
 {
-	if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
-	    !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
-	    !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) {
-		hdev->hw_stats.stats_timer++;
-		hdev->fd_arfs_expire_timer++;
+	if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+	    !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state))
 		mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
-				    system_wq, &hdev->service_task,
+				    hclge_wq, &hdev->service_task,
 				    delay_time);
-	}
 }
 
 static int hclge_get_mac_link_status(struct hclge_dev *hdev)
@@ -2748,6 +2731,10 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 
 	if (!client)
 		return;
+
+	if (test_and_set_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state))
+		return;
+
 	state = hclge_get_mac_phy_link(hdev);
 	if (state != hdev->hw.mac.link) {
 		for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
@@ -2761,6 +2748,8 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 		}
 		hdev->hw.mac.link = state;
 	}
+
+	clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state);
 }
 
 static void hclge_update_port_capability(struct hclge_mac *mac)
@@ -2831,6 +2820,12 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
 		return ret;
 	}
 
+	/* In some case, mac speed get from IMP may be 0, it shouldn't be
+	 * set to mac->speed.
+	 */
+	if (!le32_to_cpu(resp->speed))
+		return 0;
+
 	mac->speed = le32_to_cpu(resp->speed);
 	/* if resp->speed_ability is 0, it means it's an old version
 	 * firmware, do not update these params
@@ -2906,7 +2901,7 @@ static int hclge_get_status(struct hnae3_handle *handle)
 
 static struct hclge_vport *hclge_get_vf_vport(struct hclge_dev *hdev, int vf)
 {
-	if (pci_num_vf(hdev->pdev) == 0) {
+	if (!pci_num_vf(hdev->pdev)) {
 		dev_err(&hdev->pdev->dev,
 			"SRIOV is disabled, can not get vport(%d) info.\n", vf);
 		return NULL;
@@ -2940,6 +2935,9 @@ static int hclge_get_vf_config(struct hnae3_handle *handle, int vf,
 	ivf->trusted = vport->vf_info.trusted;
 	ivf->min_tx_rate = 0;
 	ivf->max_tx_rate = vport->vf_info.max_tx_rate;
+	ivf->vlan = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
+	ivf->vlan_proto = htons(vport->port_base_vlan_cfg.vlan_info.vlan_proto);
+	ivf->qos = vport->port_base_vlan_cfg.vlan_info.qos;
 	ether_addr_copy(ivf->mac, vport->vf_info.mac);
 
 	return 0;
@@ -2998,8 +2996,6 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 
 	/* check for vector0 msix event source */
 	if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
-		dev_info(&hdev->pdev->dev, "received event 0x%x\n",
-			 msix_src_reg);
 		*clearval = msix_src_reg;
 		return HCLGE_VECTOR0_EVENT_ERR;
 	}
@@ -3172,8 +3168,10 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev)
 	hclge_get_misc_vector(hdev);
 
 	/* this would be explicitly freed in the end */
+	snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s",
+		 HCLGE_NAME, pci_name(hdev->pdev));
 	ret = request_irq(hdev->misc_vector.vector_irq, hclge_misc_irq_handle,
-			  0, "hclge_misc", hdev);
+			  0, hdev->misc_vector.name, hdev);
 	if (ret) {
 		hclge_free_vector(hdev, 0);
 		dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n",
@@ -3247,7 +3245,8 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev,
 static int hclge_reset_wait(struct hclge_dev *hdev)
 {
 #define HCLGE_RESET_WATI_MS	100
-#define HCLGE_RESET_WAIT_CNT	200
+#define HCLGE_RESET_WAIT_CNT	350
+
 	u32 val, reg, reg_bit;
 	u32 cnt = 0;
 
@@ -3264,8 +3263,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 		reg = HCLGE_FUN_RST_ING;
 		reg_bit = HCLGE_FUN_RST_ING_B;
 		break;
-	case HNAE3_FLR_RESET:
-		break;
 	default:
 		dev_err(&hdev->pdev->dev,
 			"Wait for unsupported reset type: %d\n",
@@ -3273,20 +3270,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 		return -EINVAL;
 	}
 
-	if (hdev->reset_type == HNAE3_FLR_RESET) {
-		while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) &&
-		       cnt++ < HCLGE_RESET_WAIT_CNT)
-			msleep(HCLGE_RESET_WATI_MS);
-
-		if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
-			dev_err(&hdev->pdev->dev,
-				"flr wait timeout: %u\n", cnt);
-			return -EBUSY;
-		}
-
-		return 0;
-	}
-
 	val = hclge_read_dev(&hdev->hw, reg);
 	while (hnae3_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) {
 		msleep(HCLGE_RESET_WATI_MS);
@@ -3352,7 +3335,19 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
 	return 0;
 }
 
-static int hclge_func_reset_sync_vf(struct hclge_dev *hdev)
+static void hclge_mailbox_service_task(struct hclge_dev *hdev)
+{
+	if (!test_and_clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state) ||
+	    test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) ||
+	    test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state))
+		return;
+
+	hclge_mbx_handler(hdev);
+
+	clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
+}
+
+static void hclge_func_reset_sync_vf(struct hclge_dev *hdev)
 {
 	struct hclge_pf_rst_sync_cmd *req;
 	struct hclge_desc desc;
@@ -3363,26 +3358,28 @@ static int hclge_func_reset_sync_vf(struct hclge_dev *hdev)
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_VF_RST_RDY, true);
 
 	do {
+		/* vf need to down netdev by mbx during PF or FLR reset */
+		hclge_mailbox_service_task(hdev);
+
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		/* for compatible with old firmware, wait
 		 * 100 ms for VF to stop IO
 		 */
 		if (ret == -EOPNOTSUPP) {
 			msleep(HCLGE_RESET_SYNC_TIME);
-			return 0;
+			return;
 		} else if (ret) {
-			dev_err(&hdev->pdev->dev, "sync with VF fail %d!\n",
-				ret);
-			return ret;
+			dev_warn(&hdev->pdev->dev, "sync with VF fail %d!\n",
+				 ret);
+			return;
 		} else if (req->all_vf_ready) {
-			return 0;
+			return;
 		}
 		msleep(HCLGE_PF_RESET_SYNC_TIME);
 		hclge_cmd_reuse_desc(&desc, true);
 	} while (cnt++ < HCLGE_PF_RESET_SYNC_CNT);
 
-	dev_err(&hdev->pdev->dev, "sync with VF timeout!\n");
-	return -ETIME;
+	dev_warn(&hdev->pdev->dev, "sync with VF timeout!\n");
 }
 
 void hclge_report_hw_error(struct hclge_dev *hdev,
@@ -3462,12 +3459,6 @@ static void hclge_do_reset(struct hclge_dev *hdev)
 		set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending);
 		hclge_reset_task_schedule(hdev);
 		break;
-	case HNAE3_FLR_RESET:
-		dev_info(&pdev->dev, "FLR requested\n");
-		/* schedule again to check later */
-		set_bit(HNAE3_FLR_RESET, &hdev->reset_pending);
-		hclge_reset_task_schedule(hdev);
-		break;
 	default:
 		dev_warn(&pdev->dev,
 			 "Unsupported reset type: %d\n", hdev->reset_type);
@@ -3483,10 +3474,15 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 
 	/* first, resolve any unknown reset type to the known type(s) */
 	if (test_bit(HNAE3_UNKNOWN_RESET, addr)) {
+		u32 msix_sts_reg = hclge_read_dev(&hdev->hw,
+					HCLGE_VECTOR0_PF_OTHER_INT_STS_REG);
 		/* we will intentionally ignore any errors from this function
 		 *  as we will end up in *some* reset request in any case
 		 */
-		hclge_handle_hw_msix_error(hdev, addr);
+		if (hclge_handle_hw_msix_error(hdev, addr))
+			dev_info(&hdev->pdev->dev, "received msix interrupt 0x%x\n",
+				 msix_sts_reg);
+
 		clear_bit(HNAE3_UNKNOWN_RESET, addr);
 		/* We defered the clearing of the error event which caused
 		 * interrupt since it was not posssible to do that in
@@ -3551,23 +3547,6 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev)
 	hclge_enable_vector(&hdev->misc_vector, true);
 }
 
-static int hclge_reset_prepare_down(struct hclge_dev *hdev)
-{
-	int ret = 0;
-
-	switch (hdev->reset_type) {
-	case HNAE3_FUNC_RESET:
-		/* fall through */
-	case HNAE3_FLR_RESET:
-		ret = hclge_set_all_vf_rst(hdev, true);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-
 static void hclge_reset_handshake(struct hclge_dev *hdev, bool enable)
 {
 	u32 reg_val;
@@ -3581,6 +3560,19 @@ static void hclge_reset_handshake(struct hclge_dev *hdev, bool enable)
 	hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG, reg_val);
 }
 
+static int hclge_func_reset_notify_vf(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_set_all_vf_rst(hdev, true);
+	if (ret)
+		return ret;
+
+	hclge_func_reset_sync_vf(hdev);
+
+	return 0;
+}
+
 static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 {
 	u32 reg_val;
@@ -3588,10 +3580,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 
 	switch (hdev->reset_type) {
 	case HNAE3_FUNC_RESET:
-		/* to confirm whether all running VF is ready
-		 * before request PF reset
-		 */
-		ret = hclge_func_reset_sync_vf(hdev);
+		ret = hclge_func_reset_notify_vf(hdev);
 		if (ret)
 			return ret;
 
@@ -3611,16 +3600,9 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
 		hdev->rst_stats.pf_rst_cnt++;
 		break;
 	case HNAE3_FLR_RESET:
-		/* to confirm whether all running VF is ready
-		 * before request PF reset
-		 */
-		ret = hclge_func_reset_sync_vf(hdev);
+		ret = hclge_func_reset_notify_vf(hdev);
 		if (ret)
 			return ret;
-
-		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
-		set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
-		hdev->rst_stats.flr_rst_cnt++;
 		break;
 	case HNAE3_IMP_RESET:
 		hclge_handle_imp_error(hdev);
@@ -3672,6 +3654,8 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
 
 	hclge_dbg_dump_rst_info(hdev);
 
+	set_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
+
 	return false;
 }
 
@@ -3747,10 +3731,9 @@ static int hclge_reset_stack(struct hclge_dev *hdev)
 	return hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT);
 }
 
-static void hclge_reset(struct hclge_dev *hdev)
+static int hclge_reset_prepare(struct hclge_dev *hdev)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
-	enum hnae3_reset_type reset_level;
 	int ret;
 
 	/* Initialize ae_dev reset status as well, in case enet layer wants to
@@ -3761,45 +3744,41 @@ static void hclge_reset(struct hclge_dev *hdev)
 	/* perform reset of the stack & ae device for a client */
 	ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
 	if (ret)
-		goto err_reset;
-
-	ret = hclge_reset_prepare_down(hdev);
-	if (ret)
-		goto err_reset;
+		return ret;
 
 	rtnl_lock();
 	ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
-	if (ret)
-		goto err_reset_lock;
-
 	rtnl_unlock();
-
-	ret = hclge_reset_prepare_wait(hdev);
 	if (ret)
-		goto err_reset;
+		return ret;
 
-	if (hclge_reset_wait(hdev))
-		goto err_reset;
+	return hclge_reset_prepare_wait(hdev);
+}
+
+static int hclge_reset_rebuild(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	enum hnae3_reset_type reset_level;
+	int ret;
 
 	hdev->rst_stats.hw_reset_done_cnt++;
 
 	ret = hclge_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT);
 	if (ret)
-		goto err_reset;
+		return ret;
 
 	rtnl_lock();
-
 	ret = hclge_reset_stack(hdev);
+	rtnl_unlock();
 	if (ret)
-		goto err_reset_lock;
+		return ret;
 
 	hclge_clear_reset_cause(hdev);
 
 	ret = hclge_reset_prepare_up(hdev);
 	if (ret)
-		goto err_reset_lock;
+		return ret;
 
-	rtnl_unlock();
 
 	ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
 	/* ignore RoCE notify error if it fails HCLGE_RESET_MAX_FAIL_CNT - 1
@@ -3807,24 +3786,23 @@ static void hclge_reset(struct hclge_dev *hdev)
 	 */
 	if (ret &&
 	    hdev->rst_stats.reset_fail_cnt < HCLGE_RESET_MAX_FAIL_CNT - 1)
-		goto err_reset;
+		return ret;
 
 	rtnl_lock();
-
 	ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
-	if (ret)
-		goto err_reset_lock;
-
 	rtnl_unlock();
+	if (ret)
+		return ret;
 
 	ret = hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT);
 	if (ret)
-		goto err_reset;
+		return ret;
 
 	hdev->last_reset_time = jiffies;
 	hdev->rst_stats.reset_fail_cnt = 0;
 	hdev->rst_stats.reset_done_cnt++;
 	ae_dev->reset_type = HNAE3_NONE_RESET;
+	clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
 
 	/* if default_reset_request has a higher level reset request,
 	 * it should be handled as soon as possible. since some errors
@@ -3835,10 +3813,22 @@ static void hclge_reset(struct hclge_dev *hdev)
 	if (reset_level != HNAE3_NONE_RESET)
 		set_bit(reset_level, &hdev->reset_request);
 
+	return 0;
+}
+
+static void hclge_reset(struct hclge_dev *hdev)
+{
+	if (hclge_reset_prepare(hdev))
+		goto err_reset;
+
+	if (hclge_reset_wait(hdev))
+		goto err_reset;
+
+	if (hclge_reset_rebuild(hdev))
+		goto err_reset;
+
 	return;
 
-err_reset_lock:
-	rtnl_unlock();
 err_reset:
 	if (hclge_reset_err_handle(hdev))
 		hclge_reset_task_schedule(hdev);
@@ -3939,34 +3929,18 @@ static void hclge_reset_subtask(struct hclge_dev *hdev)
 	hdev->reset_type = HNAE3_NONE_RESET;
 }
 
-static void hclge_reset_service_task(struct work_struct *work)
+static void hclge_reset_service_task(struct hclge_dev *hdev)
 {
-	struct hclge_dev *hdev =
-		container_of(work, struct hclge_dev, rst_service_task);
-
-	if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+	if (!test_and_clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
 		return;
 
-	clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
+	down(&hdev->reset_sem);
+	set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
 
 	hclge_reset_subtask(hdev);
 
 	clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
-}
-
-static void hclge_mailbox_service_task(struct work_struct *work)
-{
-	struct hclge_dev *hdev =
-		container_of(work, struct hclge_dev, mbx_service_task);
-
-	if (test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state))
-		return;
-
-	clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
-
-	hclge_mbx_handler(hdev);
-
-	clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
+	up(&hdev->reset_sem);
 }
 
 static void hclge_update_vport_alive(struct hclge_dev *hdev)
@@ -3986,29 +3960,62 @@ static void hclge_update_vport_alive(struct hclge_dev *hdev)
 	}
 }
 
-static void hclge_service_task(struct work_struct *work)
+static void hclge_periodic_service_task(struct hclge_dev *hdev)
 {
-	struct hclge_dev *hdev =
-		container_of(work, struct hclge_dev, service_task.work);
+	unsigned long delta = round_jiffies_relative(HZ);
 
-	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+	/* Always handle the link updating to make sure link state is
+	 * updated when it is triggered by mbx.
+	 */
+	hclge_update_link_status(hdev);
 
-	if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) {
-		hclge_update_stats_for_all(hdev);
-		hdev->hw_stats.stats_timer = 0;
+	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
+		delta = jiffies - hdev->last_serv_processed;
+
+		if (delta < round_jiffies_relative(HZ)) {
+			delta = round_jiffies_relative(HZ) - delta;
+			goto out;
+		}
 	}
 
-	hclge_update_port_info(hdev);
-	hclge_update_link_status(hdev);
+	hdev->serv_processed_cnt++;
 	hclge_update_vport_alive(hdev);
+
+	if (test_bit(HCLGE_STATE_DOWN, &hdev->state)) {
+		hdev->last_serv_processed = jiffies;
+		goto out;
+	}
+
+	if (!(hdev->serv_processed_cnt % HCLGE_STATS_TIMER_INTERVAL))
+		hclge_update_stats_for_all(hdev);
+
+	hclge_update_port_info(hdev);
 	hclge_sync_vlan_filter(hdev);
 
-	if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
+	if (!(hdev->serv_processed_cnt % HCLGE_ARFS_EXPIRE_INTERVAL))
 		hclge_rfs_filter_expire(hdev);
-		hdev->fd_arfs_expire_timer = 0;
-	}
 
-	hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+	hdev->last_serv_processed = jiffies;
+
+out:
+	hclge_task_schedule(hdev, delta);
+}
+
+static void hclge_service_task(struct work_struct *work)
+{
+	struct hclge_dev *hdev =
+		container_of(work, struct hclge_dev, service_task.work);
+
+	hclge_reset_service_task(hdev);
+	hclge_mailbox_service_task(hdev);
+	hclge_periodic_service_task(hdev);
+
+	/* Handle reset and mbx again in case periodical task delays the
+	 * handling by calling hclge_task_schedule() in
+	 * hclge_periodic_service_task().
+	 */
+	hclge_reset_service_task(hdev);
+	hclge_mailbox_service_task(hdev);
 }
 
 struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle)
@@ -4079,7 +4086,7 @@ static int hclge_put_vector(struct hnae3_handle *handle, int vector)
 	vector_id = hclge_get_vector_index(hdev, vector);
 	if (vector_id < 0) {
 		dev_err(&hdev->pdev->dev,
-			"Get vector index fail. vector_id =%d\n", vector_id);
+			"Get vector index fail. vector = %d\n", vector);
 		return vector_id;
 	}
 
@@ -4654,7 +4661,7 @@ static int hclge_map_ring_to_vector(struct hnae3_handle *handle, int vector,
 	vector_id = hclge_get_vector_index(hdev, vector);
 	if (vector_id < 0) {
 		dev_err(&hdev->pdev->dev,
-			"Get vector index fail. vector_id =%d\n", vector_id);
+			"failed to get vector index. vector=%d\n", vector);
 		return vector_id;
 	}
 
@@ -6562,7 +6569,7 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
 
 	hclge_cfg_mac_mode(hdev, en);
 
-	ret = hclge_mac_phy_link_status_wait(hdev, en, FALSE);
+	ret = hclge_mac_phy_link_status_wait(hdev, en, false);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
 			"serdes loopback config mac mode timeout\n");
@@ -6620,7 +6627,7 @@ static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en)
 
 	hclge_cfg_mac_mode(hdev, en);
 
-	ret = hclge_mac_phy_link_status_wait(hdev, en, TRUE);
+	ret = hclge_mac_phy_link_status_wait(hdev, en, true);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
 			"phy loopback config mac mode timeout\n");
@@ -6734,6 +6741,19 @@ static void hclge_reset_tqp_stats(struct hnae3_handle *handle)
 	}
 }
 
+static void hclge_flush_link_update(struct hclge_dev *hdev)
+{
+#define HCLGE_FLUSH_LINK_TIMEOUT	100000
+
+	unsigned long last = hdev->serv_processed_cnt;
+	int i = 0;
+
+	while (test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state) &&
+	       i++ < HCLGE_FLUSH_LINK_TIMEOUT &&
+	       last == hdev->serv_processed_cnt)
+		usleep_range(1, 1);
+}
+
 static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -6742,12 +6762,12 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable)
 	if (enable) {
 		hclge_task_schedule(hdev, round_jiffies_relative(HZ));
 	} else {
-		/* Set the DOWN flag here to disable the service to be
-		 * scheduled again
-		 */
+		/* Set the DOWN flag here to disable link updating */
 		set_bit(HCLGE_STATE_DOWN, &hdev->state);
-		cancel_delayed_work_sync(&hdev->service_task);
-		clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+
+		/* flush memory to make sure DOWN is seen by service task */
+		smp_mb__before_atomic();
+		hclge_flush_link_update(hdev);
 	}
 }
 
@@ -7483,7 +7503,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev)
 	struct hclge_vport *vport;
 	int i;
 
-	mutex_lock(&hdev->vport_cfg_mutex);
 	for (i = 0; i < hdev->num_alloc_vport; i++) {
 		vport = &hdev->vport[i];
 		list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) {
@@ -7496,7 +7515,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev)
 			kfree(mac);
 		}
 	}
-	mutex_unlock(&hdev->vport_cfg_mutex);
 }
 
 static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
@@ -8257,7 +8275,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
 	struct hclge_vport *vport;
 	int i;
 
-	mutex_lock(&hdev->vport_cfg_mutex);
 	for (i = 0; i < hdev->num_alloc_vport; i++) {
 		vport = &hdev->vport[i];
 		list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
@@ -8265,7 +8282,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
 			kfree(vlan);
 		}
 	}
-	mutex_unlock(&hdev->vport_cfg_mutex);
 }
 
 static void hclge_restore_vlan_table(struct hnae3_handle *handle)
@@ -8277,7 +8293,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle)
 	u16 state, vlan_id;
 	int i;
 
-	mutex_lock(&hdev->vport_cfg_mutex);
 	for (i = 0; i < hdev->num_alloc_vport; i++) {
 		vport = &hdev->vport[i];
 		vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto;
@@ -8303,8 +8318,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle)
 				break;
 		}
 	}
-
-	mutex_unlock(&hdev->vport_cfg_mutex);
 }
 
 int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@@ -9256,6 +9269,7 @@ static void hclge_state_init(struct hclge_dev *hdev)
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 	clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
 	clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+	clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
 	clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
 	clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
 }
@@ -9269,38 +9283,57 @@ static void hclge_state_uninit(struct hclge_dev *hdev)
 		del_timer_sync(&hdev->reset_timer);
 	if (hdev->service_task.work.func)
 		cancel_delayed_work_sync(&hdev->service_task);
-	if (hdev->rst_service_task.func)
-		cancel_work_sync(&hdev->rst_service_task);
-	if (hdev->mbx_service_task.func)
-		cancel_work_sync(&hdev->mbx_service_task);
 }
 
 static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev)
 {
-#define HCLGE_FLR_WAIT_MS	100
-#define HCLGE_FLR_WAIT_CNT	50
-	struct hclge_dev *hdev = ae_dev->priv;
-	int cnt = 0;
+#define HCLGE_FLR_RETRY_WAIT_MS	500
+#define HCLGE_FLR_RETRY_CNT	5
 
-	clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
-	clear_bit(HNAE3_FLR_DONE, &hdev->flr_state);
-	set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request);
-	hclge_reset_event(hdev->pdev, NULL);
+	struct hclge_dev *hdev = ae_dev->priv;
+	int retry_cnt = 0;
+	int ret;
 
-	while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) &&
-	       cnt++ < HCLGE_FLR_WAIT_CNT)
-		msleep(HCLGE_FLR_WAIT_MS);
+retry:
+	down(&hdev->reset_sem);
+	set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+	hdev->reset_type = HNAE3_FLR_RESET;
+	ret = hclge_reset_prepare(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
+			ret);
+		if (hdev->reset_pending ||
+		    retry_cnt++ < HCLGE_FLR_RETRY_CNT) {
+			dev_err(&hdev->pdev->dev,
+				"reset_pending:0x%lx, retry_cnt:%d\n",
+				hdev->reset_pending, retry_cnt);
+			clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+			up(&hdev->reset_sem);
+			msleep(HCLGE_FLR_RETRY_WAIT_MS);
+			goto retry;
+		}
+	}
 
-	if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state))
-		dev_err(&hdev->pdev->dev,
-			"flr wait down timeout: %d\n", cnt);
+	/* disable misc vector before FLR done */
+	hclge_enable_vector(&hdev->misc_vector, false);
+	set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+	hdev->rst_stats.flr_rst_cnt++;
 }
 
 static void hclge_flr_done(struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
+	int ret;
+
+	hclge_enable_vector(&hdev->misc_vector, true);
+
+	ret = hclge_reset_rebuild(hdev);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret);
 
-	set_bit(HNAE3_FLR_DONE, &hdev->flr_state);
+	hdev->reset_type = HNAE3_NONE_RESET;
+	clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+	up(&hdev->reset_sem);
 }
 
 static void hclge_clear_resetting_state(struct hclge_dev *hdev)
@@ -9342,21 +9375,17 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
 
 	mutex_init(&hdev->vport_lock);
-	mutex_init(&hdev->vport_cfg_mutex);
 	spin_lock_init(&hdev->fd_rule_lock);
+	sema_init(&hdev->reset_sem, 1);
 
 	ret = hclge_pci_init(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "PCI init failed\n");
+	if (ret)
 		goto out;
-	}
 
 	/* Firmware command queue initialize */
 	ret = hclge_cmd_queue_init(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Cmd queue init failed, ret = %d.\n", ret);
+	if (ret)
 		goto err_pci_uninit;
-	}
 
 	/* Firmware command initialize */
 	ret = hclge_cmd_init(hdev);
@@ -9364,11 +9393,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		goto err_cmd_uninit;
 
 	ret = hclge_get_cap(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "get hw capability error, ret = %d.\n",
-			ret);
+	if (ret)
 		goto err_cmd_uninit;
-	}
 
 	ret = hclge_configure(hdev);
 	if (ret) {
@@ -9383,12 +9409,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	}
 
 	ret = hclge_misc_irq_init(hdev);
-	if (ret) {
-		dev_err(&pdev->dev,
-			"Misc IRQ(vector0) init error, ret = %d.\n",
-			ret);
+	if (ret)
 		goto err_msi_uninit;
-	}
 
 	ret = hclge_alloc_tqps(hdev);
 	if (ret) {
@@ -9397,31 +9419,22 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	}
 
 	ret = hclge_alloc_vport(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Allocate vport error, ret = %d.\n", ret);
+	if (ret)
 		goto err_msi_irq_uninit;
-	}
 
 	ret = hclge_map_tqp(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret);
+	if (ret)
 		goto err_msi_irq_uninit;
-	}
 
 	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
 		ret = hclge_mac_mdio_config(hdev);
-		if (ret) {
-			dev_err(&hdev->pdev->dev,
-				"mdio config fail ret=%d\n", ret);
+		if (ret)
 			goto err_msi_irq_uninit;
-		}
 	}
 
 	ret = hclge_init_umv_space(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "umv space init error, ret=%d.\n", ret);
+	if (ret)
 		goto err_mdiobus_unreg;
-	}
 
 	ret = hclge_mac_init(hdev);
 	if (ret) {
@@ -9477,8 +9490,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 
 	timer_setup(&hdev->reset_timer, hclge_reset_timer, 0);
 	INIT_DELAYED_WORK(&hdev->service_task, hclge_service_task);
-	INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
-	INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
 
 	/* Setup affinity after service timer setup because add_timer_on
 	 * is called in affinity notify.
@@ -9512,6 +9523,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n",
 		 HCLGE_DRIVER_NAME);
 
+	hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+
 	return 0;
 
 err_mdiobus_unreg:
@@ -9534,7 +9547,7 @@ out:
 
 static void hclge_stats_clear(struct hclge_dev *hdev)
 {
-	memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats));
+	memset(&hdev->mac_stats, 0, sizeof(hdev->mac_stats));
 }
 
 static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
@@ -9895,7 +9908,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	mutex_destroy(&hdev->vport_lock);
 	hclge_uninit_vport_mac_table(hdev);
 	hclge_uninit_vport_vlan_table(hdev);
-	mutex_destroy(&hdev->vport_cfg_mutex);
 	ae_dev->priv = NULL;
 }
 
@@ -10157,10 +10169,8 @@ static int hclge_get_dfx_reg_bd_num(struct hclge_dev *hdev,
 				    int *bd_num_list,
 				    u32 type_num)
 {
-#define HCLGE_DFX_REG_BD_NUM	4
-
 	u32 entries_per_desc, desc_index, index, offset, i;
-	struct hclge_desc desc[HCLGE_DFX_REG_BD_NUM];
+	struct hclge_desc desc[HCLGE_GET_DFX_REG_TYPE_CNT];
 	int ret;
 
 	ret = hclge_query_bd_num_cmd_send(hdev, desc);
@@ -10273,10 +10283,8 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data)
 
 	buf_len = sizeof(*desc_src) * bd_num_max;
 	desc_src = kzalloc(buf_len, GFP_KERNEL);
-	if (!desc_src) {
-		dev_err(&hdev->pdev->dev, "%s kzalloc failed\n", __func__);
+	if (!desc_src)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < dfx_reg_type_num; i++) {
 		bd_num = bd_num_list[i];
@@ -10611,6 +10619,12 @@ static int hclge_init(void)
 {
 	pr_info("%s is initializing\n", HCLGE_NAME);
 
+	hclge_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGE_NAME);
+	if (!hclge_wq) {
+		pr_err("%s: failed to create workqueue\n", HCLGE_NAME);
+		return -ENOMEM;
+	}
+
 	hnae3_register_ae_algo(&ae_algo);
 
 	return 0;
@@ -10619,6 +10633,7 @@ static int hclge_init(void)
 static void hclge_exit(void)
 {
 	hnae3_unregister_ae_algo(&ae_algo);
+	destroy_workqueue(hclge_wq);
 }
 module_init(hclge_init);
 module_exit(hclge_exit);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index ebb4c6e9aed3..f78cbb4cc85e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -139,6 +139,8 @@
 #define HCLGE_PHY_MDIX_STATUS_B		6
 #define HCLGE_PHY_SPEED_DUP_RESOLVE_B	11
 
+#define HCLGE_GET_DFX_REG_TYPE_CNT	4
+
 /* Factor used to calculate offset and bitmap of VF num */
 #define HCLGE_VF_NUM_PER_CMD           64
 
@@ -208,13 +210,14 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_NIC_REGISTERED,
 	HCLGE_STATE_ROCE_REGISTERED,
 	HCLGE_STATE_SERVICE_INITED,
-	HCLGE_STATE_SERVICE_SCHED,
 	HCLGE_STATE_RST_SERVICE_SCHED,
 	HCLGE_STATE_RST_HANDLING,
 	HCLGE_STATE_MBX_SERVICE_SCHED,
 	HCLGE_STATE_MBX_HANDLING,
 	HCLGE_STATE_STATISTICS_UPDATING,
 	HCLGE_STATE_CMD_DISABLE,
+	HCLGE_STATE_LINK_UPDATING,
+	HCLGE_STATE_RST_FAIL,
 	HCLGE_STATE_MAX
 };
 
@@ -454,11 +457,7 @@ struct hclge_mac_stats {
 	u64 mac_rx_ctrl_pkt_num;
 };
 
-#define HCLGE_STATS_TIMER_INTERVAL	(60 * 5)
-struct hclge_hw_stats {
-	struct hclge_mac_stats      mac_stats;
-	u32 stats_timer;
-};
+#define HCLGE_STATS_TIMER_INTERVAL	300UL
 
 struct hclge_vlan_type_cfg {
 	u16 rx_ot_fst_vlan_type;
@@ -549,7 +548,7 @@ struct key_info {
 
 /* assigned by firmware, the real filter number for each pf may be less */
 #define MAX_FD_FILTER_NUM	4096
-#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL	5
+#define HCLGE_ARFS_EXPIRE_INTERVAL	5UL
 
 enum HCLGE_FD_ACTIVE_RULE_TYPE {
 	HCLGE_FD_RULE_NONE,
@@ -712,7 +711,7 @@ struct hclge_dev {
 	struct hnae3_ae_dev *ae_dev;
 	struct hclge_hw hw;
 	struct hclge_misc_vector misc_vector;
-	struct hclge_hw_stats hw_stats;
+	struct hclge_mac_stats mac_stats;
 	unsigned long state;
 	unsigned long flr_state;
 	unsigned long last_reset_time;
@@ -723,6 +722,7 @@ struct hclge_dev {
 	unsigned long reset_request;	/* reset has been requested */
 	unsigned long reset_pending;	/* client rst is pending to be served */
 	struct hclge_rst_stats rst_stats;
+	struct semaphore reset_sem;	/* protect reset process */
 	u32 fw_version;
 	u16 num_vmdq_vport;		/* Num vmdq vport this PF has set up */
 	u16 num_tqps;			/* Num task queue pairs of this PF */
@@ -774,8 +774,6 @@ struct hclge_dev {
 	unsigned long service_timer_previous;
 	struct timer_list reset_timer;
 	struct delayed_work service_task;
-	struct work_struct rst_service_task;
-	struct work_struct mbx_service_task;
 
 	bool cur_promisc;
 	int num_alloc_vfs;	/* Actual number of VFs allocated */
@@ -811,7 +809,8 @@ struct hclge_dev {
 	struct hlist_head fd_rule_list;
 	spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */
 	u16 hclge_fd_rule_num;
-	u16 fd_arfs_expire_timer;
+	unsigned long serv_processed_cnt;
+	unsigned long last_serv_processed;
 	unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)];
 	enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type;
 	u8 fd_en;
@@ -825,8 +824,6 @@ struct hclge_dev {
 	u16 share_umv_size;
 	struct mutex umv_mutex; /* protect share_umv_size */
 
-	struct mutex vport_cfg_mutex;   /* Protect stored vf table */
-
 	DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
 		      HCLGE_MAC_TNL_LOG_SIZE);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 0b433ebe6a2d..a3c0822191a9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -86,10 +86,12 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
 int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
 {
 	struct hclge_dev *hdev = vport->back;
-	enum hnae3_reset_type reset_type;
+	u16 reset_type;
 	u8 msg_data[2];
 	u8 dest_vfid;
 
+	BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX);
+
 	dest_vfid = (u8)vport->vport_id;
 
 	if (hdev->reset_type == HNAE3_FUNC_RESET)
@@ -635,7 +637,6 @@ static void hclge_handle_link_change_event(struct hclge_dev *hdev,
 #define LINK_STATUS_OFFSET	1
 #define LINK_FAIL_CODE_OFFSET	2
 
-	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
 	hclge_task_schedule(hdev, 0);
 
 	if (!req->msg[LINK_STATUS_OFFSET])
@@ -798,13 +799,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 			hclge_get_link_mode(vport, req);
 			break;
 		case HCLGE_MBX_GET_VF_FLR_STATUS:
-			mutex_lock(&hdev->vport_cfg_mutex);
 			hclge_rm_vport_all_mac_table(vport, true,
 						     HCLGE_MAC_ADDR_UC);
 			hclge_rm_vport_all_mac_table(vport, true,
 						     HCLGE_MAC_ADDR_MC);
 			hclge_rm_vport_all_vlan_table(vport, true);
-			mutex_unlock(&hdev->vport_cfg_mutex);
 			break;
 		case HCLGE_MBX_GET_MEDIA_TYPE:
 			ret = hclge_get_vf_media_type(vport, req);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index af2245e3bb95..f38d236ebf4f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -443,7 +443,7 @@ void hclgevf_cmd_uninit(struct hclgevf_dev *hdev)
 {
 	spin_lock_bh(&hdev->hw.cmq.csq.lock);
 	spin_lock(&hdev->hw.cmq.crq.lock);
-	clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+	set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
 	hclgevf_cmd_uninit_regs(&hdev->hw);
 	spin_unlock(&hdev->hw.cmq.crq.lock);
 	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 25d78a5aaa34..d6597206e692 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -16,6 +16,8 @@
 static int hclgevf_reset_hdev(struct hclgevf_dev *hdev);
 static struct hnae3_ae_algo ae_algovf;
 
+static struct workqueue_struct *hclgevf_wq;
+
 static const struct pci_device_id ae_algovf_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0},
@@ -440,6 +442,9 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
 	struct hnae3_client *rclient;
 	struct hnae3_client *client;
 
+	if (test_and_set_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state))
+		return;
+
 	client = handle->client;
 	rclient = hdev->roce_client;
 
@@ -452,6 +457,8 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
 			rclient->ops->link_status_change(rhandle, !!link_state);
 		hdev->hw.mac.link = link_state;
 	}
+
+	clear_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state);
 }
 
 static void hclgevf_update_link_mode(struct hclgevf_dev *hdev)
@@ -1309,14 +1316,13 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
 	msg_data[0] = is_kill;
 	memcpy(&msg_data[1], &vlan_id, sizeof(vlan_id));
 	memcpy(&msg_data[3], &proto, sizeof(proto));
-	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
-				   HCLGE_MBX_VLAN_FILTER, msg_data,
-				   HCLGEVF_VLAN_MBX_MSG_LEN, true, NULL, 0);
-
 	/* when remove hw vlan filter failed, record the vlan id,
 	 * and try to remove it from hw later, to be consistence
 	 * with stack.
 	 */
+	ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN,
+				   HCLGE_MBX_VLAN_FILTER, msg_data,
+				   HCLGEVF_VLAN_MBX_MSG_LEN, true, NULL, 0);
 	if (is_kill && ret)
 		set_bit(vlan_id, hdev->vlan_del_fail_bmap);
 
@@ -1404,32 +1410,6 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev,
 	return ret;
 }
 
-static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev)
-{
-	struct hclgevf_dev *hdev = ae_dev->priv;
-
-	set_bit(HNAE3_FLR_DONE, &hdev->flr_state);
-}
-
-static int hclgevf_flr_poll_timeout(struct hclgevf_dev *hdev,
-				    unsigned long delay_us,
-				    unsigned long wait_cnt)
-{
-	unsigned long cnt = 0;
-
-	while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) &&
-	       cnt++ < wait_cnt)
-		usleep_range(delay_us, delay_us * 2);
-
-	if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
-		dev_err(&hdev->pdev->dev,
-			"flr wait timeout\n");
-		return -ETIMEDOUT;
-	}
-
-	return 0;
-}
-
 static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
 {
 #define HCLGEVF_RESET_WAIT_US	20000
@@ -1440,11 +1420,7 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
 	u32 val;
 	int ret;
 
-	if (hdev->reset_type == HNAE3_FLR_RESET)
-		return hclgevf_flr_poll_timeout(hdev,
-						HCLGEVF_RESET_WAIT_US,
-						HCLGEVF_RESET_WAIT_CNT);
-	else if (hdev->reset_type == HNAE3_VF_RESET)
+	if (hdev->reset_type == HNAE3_VF_RESET)
 		ret = readl_poll_timeout(hdev->hw.io_base +
 					 HCLGEVF_VF_RST_ING, val,
 					 !(val & HCLGEVF_VF_RST_ING_BIT),
@@ -1516,7 +1492,8 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
 	/* clear handshake status with IMP */
 	hclgevf_reset_handshake(hdev, false);
 
-	return 0;
+	/* bring up the nic to enable TX/RX again */
+	return hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
 static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
@@ -1525,18 +1502,10 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
 
 	int ret = 0;
 
-	switch (hdev->reset_type) {
-	case HNAE3_VF_FUNC_RESET:
+	if (hdev->reset_type == HNAE3_VF_FUNC_RESET) {
 		ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
 					   0, true, NULL, sizeof(u8));
 		hdev->rst_stats.vf_func_rst_cnt++;
-		break;
-	case HNAE3_FLR_RESET:
-		set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
-		hdev->rst_stats.flr_rst_cnt++;
-		break;
-	default:
-		break;
 	}
 
 	set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
@@ -1591,11 +1560,12 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
 		set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
 		hclgevf_reset_task_schedule(hdev);
 	} else {
+		set_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
 		hclgevf_dump_rst_info(hdev);
 	}
 }
 
-static int hclgevf_reset(struct hclgevf_dev *hdev)
+static int hclgevf_reset_prepare(struct hclgevf_dev *hdev)
 {
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	int ret;
@@ -1605,61 +1575,64 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
 	 */
 	ae_dev->reset_type = hdev->reset_type;
 	hdev->rst_stats.rst_cnt++;
-	rtnl_lock();
 
+	rtnl_lock();
 	/* bring down the nic to stop any ongoing TX/RX */
 	ret = hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
-	if (ret)
-		goto err_reset_lock;
-
 	rtnl_unlock();
-
-	ret = hclgevf_reset_prepare_wait(hdev);
 	if (ret)
-		goto err_reset;
+		return ret;
 
-	/* check if VF could successfully fetch the hardware reset completion
-	 * status from the hardware
-	 */
-	ret = hclgevf_reset_wait(hdev);
-	if (ret) {
-		/* can't do much in this situation, will disable VF */
-		dev_err(&hdev->pdev->dev,
-			"VF failed(=%d) to fetch H/W reset completion status\n",
-			ret);
-		goto err_reset;
-	}
+	return hclgevf_reset_prepare_wait(hdev);
+}
+
+static int hclgevf_reset_rebuild(struct hclgevf_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	int ret;
 
 	hdev->rst_stats.hw_rst_done_cnt++;
 
 	rtnl_lock();
-
 	/* now, re-initialize the nic client and ae device */
 	ret = hclgevf_reset_stack(hdev);
+	rtnl_unlock();
 	if (ret) {
 		dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
-		goto err_reset_lock;
+		return ret;
 	}
 
-	/* bring up the nic to enable TX/RX again */
-	ret = hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
-	if (ret)
-		goto err_reset_lock;
-
-	rtnl_unlock();
-
 	hdev->last_reset_time = jiffies;
 	ae_dev->reset_type = HNAE3_NONE_RESET;
 	hdev->rst_stats.rst_done_cnt++;
 	hdev->rst_stats.rst_fail_cnt = 0;
+	clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
+
+	return 0;
+}
+
+static void hclgevf_reset(struct hclgevf_dev *hdev)
+{
+	if (hclgevf_reset_prepare(hdev))
+		goto err_reset;
+
+	/* check if VF could successfully fetch the hardware reset completion
+	 * status from the hardware
+	 */
+	if (hclgevf_reset_wait(hdev)) {
+		/* can't do much in this situation, will disable VF */
+		dev_err(&hdev->pdev->dev,
+			"failed to fetch H/W reset completion status\n");
+		goto err_reset;
+	}
+
+	if (hclgevf_reset_rebuild(hdev))
+		goto err_reset;
+
+	return;
 
-	return ret;
-err_reset_lock:
-	rtnl_unlock();
 err_reset:
 	hclgevf_reset_err_handle(hdev);
-
-	return ret;
 }
 
 static enum hnae3_reset_type hclgevf_get_reset_level(struct hclgevf_dev *hdev,
@@ -1722,25 +1695,60 @@ static void hclgevf_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
 	set_bit(rst_type, &hdev->default_reset_request);
 }
 
+static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en)
+{
+	writel(en ? 1 : 0, vector->addr);
+}
+
 static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev)
 {
-#define HCLGEVF_FLR_WAIT_MS	100
-#define HCLGEVF_FLR_WAIT_CNT	50
+#define HCLGEVF_FLR_RETRY_WAIT_MS	500
+#define HCLGEVF_FLR_RETRY_CNT		5
+
 	struct hclgevf_dev *hdev = ae_dev->priv;
-	int cnt = 0;
+	int retry_cnt = 0;
+	int ret;
 
-	clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
-	clear_bit(HNAE3_FLR_DONE, &hdev->flr_state);
-	set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request);
-	hclgevf_reset_event(hdev->pdev, NULL);
+retry:
+	down(&hdev->reset_sem);
+	set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+	hdev->reset_type = HNAE3_FLR_RESET;
+	ret = hclgevf_reset_prepare(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
+			ret);
+		if (hdev->reset_pending ||
+		    retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) {
+			dev_err(&hdev->pdev->dev,
+				"reset_pending:0x%lx, retry_cnt:%d\n",
+				hdev->reset_pending, retry_cnt);
+			clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+			up(&hdev->reset_sem);
+			msleep(HCLGEVF_FLR_RETRY_WAIT_MS);
+			goto retry;
+		}
+	}
 
-	while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) &&
-	       cnt++ < HCLGEVF_FLR_WAIT_CNT)
-		msleep(HCLGEVF_FLR_WAIT_MS);
+	/* disable misc vector before FLR done */
+	hclgevf_enable_vector(&hdev->misc_vector, false);
+	hdev->rst_stats.flr_rst_cnt++;
+}
 
-	if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state))
-		dev_err(&hdev->pdev->dev,
-			"flr wait down timeout: %d\n", cnt);
+static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+	int ret;
+
+	hclgevf_enable_vector(&hdev->misc_vector, true);
+
+	ret = hclgevf_reset_rebuild(hdev);
+	if (ret)
+		dev_warn(&hdev->pdev->dev, "fail to rebuild, ret=%d\n",
+			 ret);
+
+	hdev->reset_type = HNAE3_NONE_RESET;
+	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+	up(&hdev->reset_sem);
 }
 
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
@@ -1767,62 +1775,37 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
 
 void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
 {
-	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
-	    !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) {
-		set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
-		schedule_work(&hdev->rst_service_task);
-	}
+	if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED,
+			      &hdev->state))
+		mod_delayed_work(hclgevf_wq, &hdev->service_task, 0);
 }
 
 void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
 {
-	if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) &&
-	    !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) {
-		set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
-		schedule_work(&hdev->mbx_service_task);
-	}
+	if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED,
+			      &hdev->state))
+		mod_delayed_work(hclgevf_wq, &hdev->service_task, 0);
 }
 
-static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
+static void hclgevf_task_schedule(struct hclgevf_dev *hdev,
+				  unsigned long delay)
 {
-	if (!test_bit(HCLGEVF_STATE_DOWN, &hdev->state)  &&
-	    !test_and_set_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state))
-		schedule_work(&hdev->service_task);
+	if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state))
+		mod_delayed_work(hclgevf_wq, &hdev->service_task, delay);
 }
 
-static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
-{
-	/* if we have any pending mailbox event then schedule the mbx task */
-	if (hdev->mbx_event_pending)
-		hclgevf_mbx_task_schedule(hdev);
-
-	if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
-		hclgevf_reset_task_schedule(hdev);
-}
-
-static void hclgevf_service_timer(struct timer_list *t)
-{
-	struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
-
-	mod_timer(&hdev->service_timer, jiffies +
-		  HCLGEVF_GENERAL_TASK_INTERVAL * HZ);
-
-	hdev->stats_timer++;
-	hclgevf_task_schedule(hdev);
-}
-
-static void hclgevf_reset_service_task(struct work_struct *work)
+static void hclgevf_reset_service_task(struct hclgevf_dev *hdev)
 {
 #define	HCLGEVF_MAX_RESET_ATTEMPTS_CNT	3
 
-	struct hclgevf_dev *hdev =
-		container_of(work, struct hclgevf_dev, rst_service_task);
-	int ret;
-
-	if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+	if (!test_and_clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state))
 		return;
 
-	clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+	down(&hdev->reset_sem);
+	set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
 
 	if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
 			       &hdev->reset_state)) {
@@ -1836,12 +1819,8 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 		hdev->last_reset_time = jiffies;
 		while ((hdev->reset_type =
 			hclgevf_get_reset_level(hdev, &hdev->reset_pending))
-		       != HNAE3_NONE_RESET) {
-			ret = hclgevf_reset(hdev);
-			if (ret)
-				dev_err(&hdev->pdev->dev,
-					"VF stack reset failed %d.\n", ret);
-		}
+		       != HNAE3_NONE_RESET)
+			hclgevf_reset(hdev);
 	} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
 				      &hdev->reset_state)) {
 		/* we could be here when either of below happens:
@@ -1882,42 +1861,29 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 		hclgevf_reset_task_schedule(hdev);
 	}
 
+	hdev->reset_type = HNAE3_NONE_RESET;
 	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+	up(&hdev->reset_sem);
 }
 
-static void hclgevf_mailbox_service_task(struct work_struct *work)
+static void hclgevf_mailbox_service_task(struct hclgevf_dev *hdev)
 {
-	struct hclgevf_dev *hdev;
-
-	hdev = container_of(work, struct hclgevf_dev, mbx_service_task);
+	if (!test_and_clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+		return;
 
 	if (test_and_set_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state))
 		return;
 
-	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
-
 	hclgevf_mbx_async_handler(hdev);
 
 	clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
 }
 
-static void hclgevf_keep_alive_timer(struct timer_list *t)
-{
-	struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer);
-
-	schedule_work(&hdev->keep_alive_task);
-	mod_timer(&hdev->keep_alive_timer, jiffies +
-		  HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
-}
-
-static void hclgevf_keep_alive_task(struct work_struct *work)
+static void hclgevf_keep_alive(struct hclgevf_dev *hdev)
 {
-	struct hclgevf_dev *hdev;
 	u8 respmsg;
 	int ret;
 
-	hdev = container_of(work, struct hclgevf_dev, keep_alive_task);
-
 	if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state))
 		return;
 
@@ -1928,19 +1894,32 @@ static void hclgevf_keep_alive_task(struct work_struct *work)
 			"VF sends keep alive cmd failed(=%d)\n", ret);
 }
 
-static void hclgevf_service_task(struct work_struct *work)
+static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
 {
-	struct hnae3_handle *handle;
-	struct hclgevf_dev *hdev;
+	unsigned long delta = round_jiffies_relative(HZ);
+	struct hnae3_handle *handle = &hdev->nic;
 
-	hdev = container_of(work, struct hclgevf_dev, service_task);
-	handle = &hdev->nic;
+	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
+		delta = jiffies - hdev->last_serv_processed;
 
-	if (hdev->stats_timer >= HCLGEVF_STATS_TIMER_INTERVAL) {
-		hclgevf_tqps_update_stats(handle);
-		hdev->stats_timer = 0;
+		if (delta < round_jiffies_relative(HZ)) {
+			delta = round_jiffies_relative(HZ) - delta;
+			goto out;
+		}
 	}
 
+	hdev->serv_processed_cnt++;
+	if (!(hdev->serv_processed_cnt % HCLGEVF_KEEP_ALIVE_TASK_INTERVAL))
+		hclgevf_keep_alive(hdev);
+
+	if (test_bit(HCLGEVF_STATE_DOWN, &hdev->state)) {
+		hdev->last_serv_processed = jiffies;
+		goto out;
+	}
+
+	if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL))
+		hclgevf_tqps_update_stats(handle);
+
 	/* request the link status from the PF. PF would be able to tell VF
 	 * about such updates in future so we might remove this later
 	 */
@@ -1950,9 +1929,27 @@ static void hclgevf_service_task(struct work_struct *work)
 
 	hclgevf_sync_vlan_filter(hdev);
 
-	hclgevf_deferred_task_schedule(hdev);
+	hdev->last_serv_processed = jiffies;
 
-	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+out:
+	hclgevf_task_schedule(hdev, delta);
+}
+
+static void hclgevf_service_task(struct work_struct *work)
+{
+	struct hclgevf_dev *hdev = container_of(work, struct hclgevf_dev,
+						service_task.work);
+
+	hclgevf_reset_service_task(hdev);
+	hclgevf_mailbox_service_task(hdev);
+	hclgevf_periodic_service_task(hdev);
+
+	/* Handle reset and mbx again in case periodical task delays the
+	 * handling by calling hclgevf_task_schedule() in
+	 * hclgevf_periodic_service_task()
+	 */
+	hclgevf_reset_service_task(hdev);
+	hclgevf_mailbox_service_task(hdev);
 }
 
 static void hclgevf_clear_event_cause(struct hclgevf_dev *hdev, u32 regclr)
@@ -2010,11 +2007,6 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
 	return HCLGEVF_VECTOR0_EVENT_OTHER;
 }
 
-static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en)
-{
-	writel(en ? 1 : 0, vector->addr);
-}
-
 static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 {
 	enum hclgevf_evt_cause event_cause;
@@ -2189,16 +2181,31 @@ static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev)
 				       false);
 }
 
+static void hclgevf_flush_link_update(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_FLUSH_LINK_TIMEOUT	100000
+
+	unsigned long last = hdev->serv_processed_cnt;
+	int i = 0;
+
+	while (test_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state) &&
+	       i++ < HCLGEVF_FLUSH_LINK_TIMEOUT &&
+	       last == hdev->serv_processed_cnt)
+		usleep_range(1, 1);
+}
+
 static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
 	if (enable) {
-		mod_timer(&hdev->service_timer, jiffies + HZ);
+		hclgevf_task_schedule(hdev, 0);
 	} else {
-		del_timer_sync(&hdev->service_timer);
-		cancel_work_sync(&hdev->service_task);
-		clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+		set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+
+		/* flush memory to make sure DOWN is seen by service task */
+		smp_mb__before_atomic();
+		hclgevf_flush_link_update(hdev);
 	}
 }
 
@@ -2245,16 +2252,12 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive)
 
 static int hclgevf_client_start(struct hnae3_handle *handle)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	int ret;
 
 	ret = hclgevf_set_alive(handle, true);
 	if (ret)
 		return ret;
 
-	mod_timer(&hdev->keep_alive_timer, jiffies +
-		  HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
-
 	return 0;
 }
 
@@ -2267,27 +2270,18 @@ static void hclgevf_client_stop(struct hnae3_handle *handle)
 	if (ret)
 		dev_warn(&hdev->pdev->dev,
 			 "%s failed %d\n", __func__, ret);
-
-	del_timer_sync(&hdev->keep_alive_timer);
-	cancel_work_sync(&hdev->keep_alive_task);
 }
 
 static void hclgevf_state_init(struct hclgevf_dev *hdev)
 {
-	/* setup tasks for the MBX */
-	INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 	clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
+	clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
 
-	/* setup tasks for service timer */
-	timer_setup(&hdev->service_timer, hclgevf_service_timer, 0);
-
-	INIT_WORK(&hdev->service_task, hclgevf_service_task);
-	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
-
-	INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task);
+	INIT_DELAYED_WORK(&hdev->service_task, hclgevf_service_task);
 
 	mutex_init(&hdev->mbx_resp.mbx_mutex);
+	sema_init(&hdev->reset_sem, 1);
 
 	/* bring the device down */
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
@@ -2298,18 +2292,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
 	set_bit(HCLGEVF_STATE_REMOVING, &hdev->state);
 
-	if (hdev->keep_alive_timer.function)
-		del_timer_sync(&hdev->keep_alive_timer);
-	if (hdev->keep_alive_task.func)
-		cancel_work_sync(&hdev->keep_alive_task);
-	if (hdev->service_timer.function)
-		del_timer_sync(&hdev->service_timer);
-	if (hdev->service_task.func)
-		cancel_work_sync(&hdev->service_task);
-	if (hdev->mbx_service_task.func)
-		cancel_work_sync(&hdev->mbx_service_task);
-	if (hdev->rst_service_task.func)
-		cancel_work_sync(&hdev->rst_service_task);
+	if (hdev->service_task.work.func)
+		cancel_delayed_work_sync(&hdev->service_task);
 
 	mutex_destroy(&hdev->mbx_resp.mbx_mutex);
 }
@@ -2383,8 +2367,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
 
 	hclgevf_get_misc_vector(hdev);
 
+	snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s",
+		 HCLGEVF_NAME, pci_name(hdev->pdev));
 	ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
-			  0, "hclgevf_cmd", hdev);
+			  0, hdev->misc_vector.name, hdev);
 	if (ret) {
 		dev_err(&hdev->pdev->dev, "VF failed to request misc irq(%d)\n",
 			hdev->misc_vector.vector_irq);
@@ -2611,11 +2597,11 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
 
 	if (hnae3_dev_roce_supported(hdev)) {
 		hdev->roce_base_msix_offset =
-		hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
+		hnae3_get_field(le16_to_cpu(req->msixcap_localid_ba_rocee),
 				HCLGEVF_MSIX_OFT_ROCEE_M,
 				HCLGEVF_MSIX_OFT_ROCEE_S);
 		hdev->num_roce_msix =
-		hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
+		hnae3_get_field(le16_to_cpu(req->vf_intr_vector_number),
 				HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
 
 		/* nic's msix numbers is always equals to the roce's. */
@@ -2628,7 +2614,7 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
 				hdev->roce_base_msix_offset;
 	} else {
 		hdev->num_msi =
-		hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
+		hnae3_get_field(le16_to_cpu(req->vf_intr_vector_number),
 				HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
 
 		hdev->num_nic_msix = hdev->num_msi;
@@ -2725,16 +2711,12 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 	int ret;
 
 	ret = hclgevf_pci_init(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "PCI initialization failed\n");
+	if (ret)
 		return ret;
-	}
 
 	ret = hclgevf_cmd_queue_init(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Cmd queue init failed: %d\n", ret);
+	if (ret)
 		goto err_cmd_queue_init;
-	}
 
 	ret = hclgevf_cmd_init(hdev);
 	if (ret)
@@ -2742,11 +2724,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 
 	/* Get vf resource */
 	ret = hclgevf_query_vf_resource(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Query vf status error, ret = %d.\n", ret);
+	if (ret)
 		goto err_cmd_init;
-	}
 
 	ret = hclgevf_init_msi(hdev);
 	if (ret) {
@@ -2756,13 +2735,11 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 
 	hclgevf_state_init(hdev);
 	hdev->reset_level = HNAE3_VF_FUNC_RESET;
+	hdev->reset_type = HNAE3_NONE_RESET;
 
 	ret = hclgevf_misc_irq_init(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "failed(%d) to init Misc IRQ(vector0)\n",
-			ret);
+	if (ret)
 		goto err_misc_irq_init;
-	}
 
 	set_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state);
 
@@ -2779,10 +2756,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 	}
 
 	ret = hclgevf_set_handle_info(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "failed(%d) to set handle info\n", ret);
+	if (ret)
 		goto err_config;
-	}
 
 	ret = hclgevf_config_gro(hdev, true);
 	if (ret)
@@ -2807,6 +2782,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 	dev_info(&hdev->pdev->dev, "finished initializing %s driver\n",
 		 HCLGEVF_DRIVER_NAME);
 
+	hclgevf_task_schedule(hdev, round_jiffies_relative(HZ));
+
 	return 0;
 
 err_config:
@@ -2838,7 +2815,6 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
-	struct hclgevf_dev *hdev;
 	int ret;
 
 	ret = hclgevf_alloc_hdev(ae_dev);
@@ -2853,10 +2829,6 @@ static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	hdev = ae_dev->priv;
-	timer_setup(&hdev->keep_alive_timer, hclgevf_keep_alive_timer, 0);
-	INIT_WORK(&hdev->keep_alive_task, hclgevf_keep_alive_task);
-
 	return 0;
 }
 
@@ -3213,6 +3185,12 @@ static int hclgevf_init(void)
 {
 	pr_info("%s is initializing\n", HCLGEVF_NAME);
 
+	hclgevf_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGEVF_NAME);
+	if (!hclgevf_wq) {
+		pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME);
+		return -ENOMEM;
+	}
+
 	hnae3_register_ae_algo(&ae_algovf);
 
 	return 0;
@@ -3221,6 +3199,7 @@ static int hclgevf_init(void)
 static void hclgevf_exit(void)
 {
 	hnae3_unregister_ae_algo(&ae_algovf);
+	destroy_workqueue(hclgevf_wq);
 }
 module_init(hclgevf_init);
 module_exit(hclgevf_exit);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 2f4c81bf4169..fee8d97f323c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -142,12 +142,13 @@ enum hclgevf_states {
 	HCLGEVF_STATE_REMOVING,
 	HCLGEVF_STATE_NIC_REGISTERED,
 	/* task states */
-	HCLGEVF_STATE_SERVICE_SCHED,
 	HCLGEVF_STATE_RST_SERVICE_SCHED,
 	HCLGEVF_STATE_RST_HANDLING,
 	HCLGEVF_STATE_MBX_SERVICE_SCHED,
 	HCLGEVF_STATE_MBX_HANDLING,
 	HCLGEVF_STATE_CMD_DISABLE,
+	HCLGEVF_STATE_LINK_UPDATING,
+	HCLGEVF_STATE_RST_FAIL,
 };
 
 struct hclgevf_mac {
@@ -220,6 +221,7 @@ struct hclgevf_rss_cfg {
 struct hclgevf_misc_vector {
 	u8 __iomem *addr;
 	int vector_irq;
+	char name[HNAE3_INT_NAME_LEN];
 };
 
 struct hclgevf_rst_stats {
@@ -251,6 +253,7 @@ struct hclgevf_dev {
 	unsigned long reset_state;	/* requested, pending */
 	struct hclgevf_rst_stats rst_stats;
 	u32 reset_attempts;
+	struct semaphore reset_sem;	/* protect reset process */
 
 	u32 fw_version;
 	u16 num_tqps;		/* num task queue pairs of this PF */
@@ -283,12 +286,7 @@ struct hclgevf_dev {
 	struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
 	struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
 
-	struct timer_list service_timer;
-	struct timer_list keep_alive_timer;
-	struct work_struct service_task;
-	struct work_struct keep_alive_task;
-	struct work_struct rst_service_task;
-	struct work_struct mbx_service_task;
+	struct delayed_work service_task;
 
 	struct hclgevf_tqp *htqp;
 
@@ -298,7 +296,8 @@ struct hclgevf_dev {
 	struct hnae3_client *nic_client;
 	struct hnae3_client *roce_client;
 	u32 flag;
-	u32 stats_timer;
+	unsigned long serv_processed_cnt;
+	unsigned long last_serv_processed;
 };
 
 static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 2411ad270c98..02a14f5e7fe3 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -766,7 +766,7 @@ static void hinic_set_rx_mode(struct net_device *netdev)
 	queue_work(nic_dev->workq, &rx_mode_work->work);
 }
 
-static void hinic_tx_timeout(struct net_device *netdev)
+static void hinic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index 92929750f832..bef676d93339 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -363,7 +363,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t i596_interrupt(int irq, void *dev_id);
 static int i596_close(struct net_device *dev);
 static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd);
-static void i596_tx_timeout (struct net_device *dev);
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue);
 static void print_eth(unsigned char *buf, char *str);
 static void set_multicast_list(struct net_device *dev);
 
@@ -1019,7 +1019,7 @@ err_irq_dev:
 	return res;
 }
 
-static void i596_tx_timeout (struct net_device *dev)
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	struct i596_private *lp = dev->ml_priv;
 	int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c
index bb3b8adbe4f0..a0bfb509e002 100644
--- a/drivers/net/ethernet/i825xx/ether1.c
+++ b/drivers/net/ethernet/i825xx/ether1.c
@@ -66,7 +66,7 @@ static netdev_tx_t ether1_sendpacket(struct sk_buff *skb,
 static irqreturn_t ether1_interrupt(int irq, void *dev_id);
 static int ether1_close(struct net_device *dev);
 static void ether1_setmulticastlist(struct net_device *dev);
-static void ether1_timeout(struct net_device *dev);
+static void ether1_timeout(struct net_device *dev, unsigned int txqueue);
 
 /* ------------------------------------------------------------------------- */
 
@@ -650,7 +650,7 @@ ether1_open (struct net_device *dev)
 }
 
 static void
-ether1_timeout(struct net_device *dev)
+ether1_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	printk(KERN_WARNING "%s: transmit timeout, network cable problem?\n",
 		dev->name);
diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c
index f9742af7f142..b03757e169e4 100644
--- a/drivers/net/ethernet/i825xx/lib82596.c
+++ b/drivers/net/ethernet/i825xx/lib82596.c
@@ -351,7 +351,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t i596_interrupt(int irq, void *dev_id);
 static int i596_close(struct net_device *dev);
 static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd);
-static void i596_tx_timeout (struct net_device *dev);
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue);
 static void print_eth(unsigned char *buf, char *str);
 static void set_multicast_list(struct net_device *dev);
 static inline void ca(struct net_device *dev);
@@ -936,7 +936,7 @@ out_remove_rx_bufs:
 	return -EAGAIN;
 }
 
-static void i596_tx_timeout (struct net_device *dev)
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue)
 {
 	struct i596_private *lp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index 6436a98c5953..22f5887578b2 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c
@@ -91,10 +91,10 @@ static int sni_82596_probe(struct platform_device *dev)
 	idprom = platform_get_resource(dev, IORESOURCE_MEM, 2);
 	if (!res || !ca || !options || !idprom)
 		return -ENODEV;
-	mpu_addr = ioremap_nocache(res->start, 4);
+	mpu_addr = ioremap(res->start, 4);
 	if (!mpu_addr)
 		return -ENOMEM;
-	ca_addr = ioremap_nocache(ca->start, 4);
+	ca_addr = ioremap(ca->start, 4);
 	if (!ca_addr)
 		goto probe_failed_free_mpu;
 
@@ -110,7 +110,7 @@ static int sni_82596_probe(struct platform_device *dev)
 	netdevice->base_addr = res->start;
 	netdevice->irq = platform_get_irq(dev, 0);
 
-	eth_addr = ioremap_nocache(idprom->start, 0x10);
+	eth_addr = ioremap(idprom->start, 0x10);
 	if (!eth_addr)
 		goto probe_failed;
 
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 1a86184d44c0..4564ee02c95f 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -125,7 +125,7 @@ static netdev_tx_t     sun3_82586_send_packet(struct sk_buff *,
 					      struct net_device *);
 static struct  net_device_stats *sun3_82586_get_stats(struct net_device *dev);
 static void    set_multicast_list(struct net_device *dev);
-static void    sun3_82586_timeout(struct net_device *dev);
+static void    sun3_82586_timeout(struct net_device *dev, unsigned int txqueue);
 #if 0
 static void    sun3_82586_dump(struct net_device *,void *);
 #endif
@@ -965,7 +965,7 @@ static void startrecv586(struct net_device *dev)
 	WAIT_4_SCB_CMD_RUC();	/* wait for accept cmd. (no timeout!!) */
 }
 
-static void sun3_82586_timeout(struct net_device *dev)
+static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct priv *p = netdev_priv(dev);
 #ifndef NO_NOPCOMMANDS
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 13e30eba5349..0273fb7a9d01 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2786,7 +2786,7 @@ out:
 	return;
 }
 
-static void ehea_tx_watchdog(struct net_device *dev)
+static void ehea_tx_watchdog(struct net_device *dev, unsigned int txqueue)
 {
 	struct ehea_port *port = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 2e40425d8a34..b7fc17756c51 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -776,7 +776,7 @@ static void emac_reset_work(struct work_struct *work)
 	mutex_unlock(&dev->link_lock);
 }
 
-static void emac_tx_timeout(struct net_device *ndev)
+static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 830791ab4619..c75239d8820f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2282,7 +2282,7 @@ err:
 	return -ret;
 }
 
-static void ibmvnic_tx_timeout(struct net_device *dev)
+static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index a65d5a9ba7db..1b8d015ebfb0 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2316,7 +2316,7 @@ static void e100_down(struct nic *nic)
 	e100_rx_clean_list(nic);
 }
 
-static void e100_tx_timeout(struct net_device *netdev)
+static void e100_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct nic *nic = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index aca97b084003..2bced34c19ba 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -134,7 +134,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
 			   int cmd);
 static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
 static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
-static void e1000_tx_timeout(struct net_device *dev);
+static void e1000_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void e1000_reset_task(struct work_struct *work);
 static void e1000_smartspeed(struct e1000_adapter *adapter);
 static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
@@ -3488,7 +3488,7 @@ exit:
  * e1000_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  **/
-static void e1000_tx_timeout(struct net_device *netdev)
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 7c5b18d87b49..db4ea58bac82 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4721,7 +4721,7 @@ int e1000e_close(struct net_device *netdev)
 		e1000_free_irq(adapter);
 
 		/* Link status message must follow this format */
-		pr_info("%s NIC Link is Down\n", netdev->name);
+		netdev_info(netdev, "NIC Link is Down\n");
 	}
 
 	napi_disable(&adapter->napi);
@@ -5071,12 +5071,13 @@ static void e1000_print_link_info(struct e1000_adapter *adapter)
 	u32 ctrl = er32(CTRL);
 
 	/* Link status message must follow this format for user tools */
-	pr_info("%s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
-		adapter->netdev->name, adapter->link_speed,
-		adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half",
-		(ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE) ? "Rx/Tx" :
-		(ctrl & E1000_CTRL_RFCE) ? "Rx" :
-		(ctrl & E1000_CTRL_TFCE) ? "Tx" : "None");
+	netdev_info(adapter->netdev,
+		    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+		    adapter->link_speed,
+		    adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half",
+		    (ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE) ? "Rx/Tx" :
+		    (ctrl & E1000_CTRL_RFCE) ? "Rx" :
+		    (ctrl & E1000_CTRL_TFCE) ? "Tx" : "None");
 }
 
 static bool e1000e_has_link(struct e1000_adapter *adapter)
@@ -5319,7 +5320,7 @@ static void e1000_watchdog_task(struct work_struct *work)
 			adapter->link_speed = 0;
 			adapter->link_duplex = 0;
 			/* Link status message must follow this format */
-			pr_info("%s NIC Link is Down\n", adapter->netdev->name);
+			netdev_info(netdev, "NIC Link is Down\n");
 			netif_carrier_off(netdev);
 			netif_stop_queue(netdev);
 			if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -5940,7 +5941,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
  * e1000_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  **/
-static void e1000_tx_timeout(struct net_device *netdev)
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 68baee04dc58..0637ccadee79 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -696,21 +696,24 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 /**
  * fm10k_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
+ * @txqueue: the index of the Tx queue that timed out
  **/
-static void fm10k_tx_timeout(struct net_device *netdev)
+static void fm10k_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_ring *tx_ring;
 	bool real_tx_hang = false;
-	int i;
-
-#define TX_TIMEO_LIMIT 16000
-	for (i = 0; i < interface->num_tx_queues; i++) {
-		struct fm10k_ring *tx_ring = interface->tx_ring[i];
 
-		if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring))
-			real_tx_hang = true;
+	if (txqueue >= interface->num_tx_queues) {
+		WARN(1, "invalid Tx queue index %d", txqueue);
+		return;
 	}
 
+	tx_ring = interface->tx_ring[txqueue];
+	if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring))
+		real_tx_hang = true;
+
+#define TX_TIMEO_LIMIT 16000
 	if (real_tx_hang) {
 		fm10k_tx_timeout_reset(interface);
 	} else {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index d4055037af89..45b90eb11adb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1113,7 +1113,7 @@ i40e_status i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
 	 */
 	pba_size--;
 	if (pba_num_size < (((u32)pba_size * 2) + 1)) {
-		hw_dbg(hw, "Buffer to small for PBA data.\n");
+		hw_dbg(hw, "Buffer too small for PBA data.\n");
 		return I40E_ERR_PARAM;
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2c5af6d4a6b1..8c3e753bfb9d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -301,43 +301,24 @@ void i40e_service_event_schedule(struct i40e_pf *pf)
  * device is munged, not just the one netdev port, so go for the full
  * reset.
  **/
-static void i40e_tx_timeout(struct net_device *netdev)
+static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_ring *tx_ring = NULL;
-	unsigned int i, hung_queue = 0;
+	unsigned int i;
 	u32 head, val;
 
 	pf->tx_timeout_count++;
 
-	/* find the stopped queue the same way the stack does */
-	for (i = 0; i < netdev->num_tx_queues; i++) {
-		struct netdev_queue *q;
-		unsigned long trans_start;
-
-		q = netdev_get_tx_queue(netdev, i);
-		trans_start = q->trans_start;
-		if (netif_xmit_stopped(q) &&
-		    time_after(jiffies,
-			       (trans_start + netdev->watchdog_timeo))) {
-			hung_queue = i;
-			break;
-		}
-	}
-
-	if (i == netdev->num_tx_queues) {
-		netdev_info(netdev, "tx_timeout: no netdev hung queue found\n");
-	} else {
-		/* now that we have an index, find the tx_ring struct */
-		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
-				if (hung_queue ==
-				    vsi->tx_rings[i]->queue_index) {
-					tx_ring = vsi->tx_rings[i];
-					break;
-				}
+	/* with txqueue index, find the tx_ring struct */
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
+			if (txqueue ==
+			    vsi->tx_rings[i]->queue_index) {
+				tx_ring = vsi->tx_rings[i];
+				break;
 			}
 		}
 	}
@@ -363,14 +344,14 @@ static void i40e_tx_timeout(struct net_device *netdev)
 			val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
 
 		netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n",
-			    vsi->seid, hung_queue, tx_ring->next_to_clean,
+			    vsi->seid, txqueue, tx_ring->next_to_clean,
 			    head, tx_ring->next_to_use,
 			    readl(tx_ring->tail), val);
 	}
 
 	pf->tx_timeout_last_recovery = jiffies;
-	netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n",
-		    pf->tx_timeout_recovery_level, hung_queue);
+	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n",
+		    pf->tx_timeout_recovery_level, txqueue);
 
 	switch (pf->tx_timeout_recovery_level) {
 	case 1:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index f73cd917c44f..42058fad6a3c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -269,7 +269,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
 
 	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
 
-	xsk_umem_discard_addr(umem);
+	xsk_umem_release_addr(umem);
 	return true;
 }
 
@@ -306,7 +306,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
 
 	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
 
-	xsk_umem_discard_addr_rq(umem);
+	xsk_umem_release_addr_rq(umem);
 	return true;
 }
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 8e16be960e96..62fe56ddcb6e 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -159,7 +159,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
  * iavf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  **/
-static void iavf_tx_timeout(struct net_device *netdev)
+static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 7cb829132d28..59544b0fc086 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -17,7 +17,8 @@ ice-y := ice_main.o	\
 	 ice_lib.o	\
 	 ice_txrx_lib.o	\
 	 ice_txrx.o	\
-	 ice_flex_pipe.o	\
+	 ice_flex_pipe.o \
+	 ice_flow.o	\
 	 ice_ethtool.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
 ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index f972dce8aebb..cb10abb14e11 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -174,6 +174,8 @@ struct ice_sw {
 	struct ice_pf *pf;
 	u16 sw_id;		/* switch ID for this switch */
 	u16 bridge_mode;	/* VEB/VEPA/Port Virtualizer */
+	struct ice_vsi *dflt_vsi;	/* default VSI for this switch */
+	u8 dflt_vsi_ena:1;	/* true if above dflt_vsi is enabled */
 };
 
 enum ice_state {
@@ -275,6 +277,7 @@ struct ice_vsi {
 	u8 current_isup:1;		 /* Sync 'link up' logging */
 	u8 stat_offsets_loaded:1;
 	u8 vlan_ena:1;
+	u16 num_vlan;
 
 	/* queue information */
 	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -462,12 +465,13 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
 static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
 {
 	struct xdp_umem **umems = ring->vsi->xsk_umems;
-	int qid = ring->q_index;
+	u16 qid = ring->q_index;
 
 	if (ice_ring_is_xdp(ring))
 		qid -= ring->vsi->num_xdp_txq;
 
-	if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+	if (qid >= ring->vsi->num_xsk_umems || !umems || !umems[qid] ||
+	    !ice_is_xdp_ena_vsi(ring->vsi))
 		return NULL;
 
 	return umems[qid];
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 5421fc413f94..4459bc564b11 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -232,6 +232,13 @@ struct ice_aqc_get_sw_cfg_resp {
  */
 #define ICE_AQC_RES_TYPE_VSI_LIST_REP			0x03
 #define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE			0x04
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID		0x60
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM		0x61
+
+#define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM		BIT(12)
+#define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX		BIT(13)
+
+#define ICE_AQC_RES_TYPE_FLAG_DEDICATED			0x00
 
 /* Allocate Resources command (indirect 0x0208)
  * Free Resources command (indirect 0x0209)
@@ -1849,6 +1856,7 @@ enum ice_adminq_opc {
 
 	/* package commands */
 	ice_aqc_opc_download_pkg			= 0x0C40,
+	ice_aqc_opc_update_pkg				= 0x0C42,
 	ice_aqc_opc_get_pkg_info_list			= 0x0C43,
 
 	/* debug commands */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 77d6a0291e97..d8e975cceb21 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -93,7 +93,8 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
  * @vsi: the VSI being configured
  * @v_idx: index of the vector in the VSI struct
  *
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ * We allocate one q_vector and set default value for ITR setting associated
+ * with this q_vector. If allocation fails we return -ENOMEM.
  */
 static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
 {
@@ -108,6 +109,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
 
 	q_vector->vsi = vsi;
 	q_vector->v_idx = v_idx;
+	q_vector->tx.itr_setting = ICE_DFLT_TX_ITR;
+	q_vector->rx.itr_setting = ICE_DFLT_RX_ITR;
 	if (vsi->type == ICE_VSI_VF)
 		goto out;
 	/* only set affinity_mask if the CPU is online */
@@ -299,6 +302,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 
 	if (ring->vsi->type == ICE_VSI_PF) {
 		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+			/* coverity[check_return] */
 			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
 					 ring->q_index);
 
@@ -323,7 +327,9 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 			dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
 				 ring->q_index);
 		} else {
+			ring->zca.free = NULL;
 			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+				/* coverity[check_return] */
 				xdp_rxq_info_reg(&ring->xdp_rxq,
 						 ring->netdev,
 						 ring->q_index);
@@ -674,10 +680,6 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
 	if (q_vector->num_ring_rx) {
 		struct ice_ring_container *rc = &q_vector->rx;
 
-		/* if this value is set then don't overwrite with default */
-		if (!rc->itr_setting)
-			rc->itr_setting = ICE_DFLT_RX_ITR;
-
 		rc->target_itr = ITR_TO_REG(rc->itr_setting);
 		rc->next_update = jiffies + 1;
 		rc->current_itr = rc->target_itr;
@@ -688,10 +690,6 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
 	if (q_vector->num_ring_tx) {
 		struct ice_ring_container *rc = &q_vector->tx;
 
-		/* if this value is set then don't overwrite with default */
-		if (!rc->itr_setting)
-			rc->itr_setting = ICE_DFLT_TX_ITR;
-
 		rc->target_itr = ITR_TO_REG(rc->itr_setting);
 		rc->next_update = jiffies + 1;
 		rc->current_itr = rc->target_itr;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index fb1d930470c7..0207e28c2682 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4,28 +4,10 @@
 #include "ice_common.h"
 #include "ice_sched.h"
 #include "ice_adminq_cmd.h"
+#include "ice_flow.h"
 
 #define ICE_PF_RESET_WAIT_COUNT	200
 
-#define ICE_PROG_FLEX_ENTRY(hw, rxdid, mdid, idx) \
-	wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(rxdid), \
-	     ((ICE_RX_OPC_MDID << \
-	       GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \
-	      GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \
-	     (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \
-	      GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M))
-
-#define ICE_PROG_FLG_ENTRY(hw, rxdid, flg_0, flg_1, flg_2, flg_3, idx) \
-	wr32((hw), GLFLXP_RXDID_FLAGS(rxdid, idx), \
-	     (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \
-	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \
-	     (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \
-	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \
-	     (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \
-	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \
-	     (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \
-	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M))
-
 /**
  * ice_set_mac_type - Sets MAC type
  * @hw: pointer to the HW structure
@@ -348,88 +330,6 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 }
 
 /**
- * ice_init_flex_flags
- * @hw: pointer to the hardware structure
- * @prof_id: Rx Descriptor Builder profile ID
- *
- * Function to initialize Rx flex flags
- */
-static void ice_init_flex_flags(struct ice_hw *hw, enum ice_rxdid prof_id)
-{
-	u8 idx = 0;
-
-	/* Flex-flag fields (0-2) are programmed with FLG64 bits with layout:
-	 * flexiflags0[5:0] - TCP flags, is_packet_fragmented, is_packet_UDP_GRE
-	 * flexiflags1[3:0] - Not used for flag programming
-	 * flexiflags2[7:0] - Tunnel and VLAN types
-	 * 2 invalid fields in last index
-	 */
-	switch (prof_id) {
-	/* Rx flex flags are currently programmed for the NIC profiles only.
-	 * Different flag bit programming configurations can be added per
-	 * profile as needed.
-	 */
-	case ICE_RXDID_FLEX_NIC:
-	case ICE_RXDID_FLEX_NIC_2:
-		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_FRG,
-				   ICE_FLG_UDP_GRE, ICE_FLG_PKT_DSI,
-				   ICE_FLG_FIN, idx++);
-		/* flex flag 1 is not used for flexi-flag programming, skipping
-		 * these four FLG64 bits.
-		 */
-		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_SYN, ICE_FLG_RST,
-				   ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx++);
-		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_DSI,
-				   ICE_FLG_PKT_DSI, ICE_FLG_EVLAN_x8100,
-				   ICE_FLG_EVLAN_x9100, idx++);
-		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_VLAN_x8100,
-				   ICE_FLG_TNL_VLAN, ICE_FLG_TNL_MAC,
-				   ICE_FLG_TNL0, idx++);
-		ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_TNL1, ICE_FLG_TNL2,
-				   ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx);
-		break;
-
-	default:
-		ice_debug(hw, ICE_DBG_INIT,
-			  "Flag programming for profile ID %d not supported\n",
-			  prof_id);
-	}
-}
-
-/**
- * ice_init_flex_flds
- * @hw: pointer to the hardware structure
- * @prof_id: Rx Descriptor Builder profile ID
- *
- * Function to initialize flex descriptors
- */
-static void ice_init_flex_flds(struct ice_hw *hw, enum ice_rxdid prof_id)
-{
-	enum ice_flex_rx_mdid mdid;
-
-	switch (prof_id) {
-	case ICE_RXDID_FLEX_NIC:
-	case ICE_RXDID_FLEX_NIC_2:
-		ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_LOW, 0);
-		ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_HIGH, 1);
-		ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_FLOW_ID_LOWER, 2);
-
-		mdid = (prof_id == ICE_RXDID_FLEX_NIC_2) ?
-			ICE_RX_MDID_SRC_VSI : ICE_RX_MDID_FLOW_ID_HIGH;
-
-		ICE_PROG_FLEX_ENTRY(hw, prof_id, mdid, 3);
-
-		ice_init_flex_flags(hw, prof_id);
-		break;
-
-	default:
-		ice_debug(hw, ICE_DBG_INIT,
-			  "Field init for profile ID %d not supported\n",
-			  prof_id);
-	}
-}
-
-/**
  * ice_init_fltr_mgmt_struct - initializes filter management list and locks
  * @hw: pointer to the HW struct
  */
@@ -882,9 +782,6 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 
 	if (status)
 		goto err_unroll_fltr_mgmt_struct;
-
-	ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC);
-	ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC_2);
 	status = ice_init_hw_tbls(hw);
 	if (status)
 		goto err_unroll_fltr_mgmt_struct;
@@ -1601,6 +1498,114 @@ void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
 }
 
 /**
+ * ice_aq_alloc_free_res - command to allocate/free resources
+ * @hw: pointer to the HW struct
+ * @num_entries: number of resource entries in buffer
+ * @buf: Indirect buffer to hold data parameters and response
+ * @buf_size: size of buffer for indirect commands
+ * @opc: pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Helper function to allocate/free resources using the admin queue commands
+ */
+enum ice_status
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+		      enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_alloc_free_res_cmd *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.sw_res_ctrl;
+
+	if (!buf)
+		return ICE_ERR_PARAM;
+
+	if (buf_size < (num_entries * sizeof(buf->elem[0])))
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_entries = cpu_to_le16(num_entries);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_alloc_hw_res - allocate resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource
+ * @num: number of resources to allocate
+ * @btm: allocate from bottom
+ * @res: pointer to array that will receive the resources
+ */
+enum ice_status
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	buf_len = struct_size(buf, elem, num - 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Prepare buffer to allocate resource. */
+	buf->num_elems = cpu_to_le16(num);
+	buf->res_type = cpu_to_le16(type | ICE_AQC_RES_TYPE_FLAG_DEDICATED |
+				    ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX);
+	if (btm)
+		buf->res_type |= cpu_to_le16(ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM);
+
+	status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+				       ice_aqc_opc_alloc_res, NULL);
+	if (status)
+		goto ice_alloc_res_exit;
+
+	memcpy(res, buf->elem, sizeof(buf->elem) * num);
+
+ice_alloc_res_exit:
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_free_hw_res - free allocated HW resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource to free
+ * @num: number of resources
+ * @res: pointer to array that contains the resources to free
+ */
+enum ice_status
+ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	buf_len = struct_size(buf, elem, num - 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Prepare buffer to free resource. */
+	buf->num_elems = cpu_to_le16(num);
+	buf->res_type = cpu_to_le16(type);
+	memcpy(buf->elem, res, sizeof(buf->elem) * num);
+
+	status = ice_aq_alloc_free_res(hw, num, buf, buf_len,
+				       ice_aqc_opc_free_res, NULL);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW, "CQ CMD Buffer:\n");
+
+	kfree(buf);
+	return status;
+}
+
+/**
  * ice_get_num_per_func - determine number of resources per PF
  * @hw: pointer to the HW structure
  * @max: value to be evenly split between each PF
@@ -3510,7 +3515,10 @@ enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle)
 		if (status)
 			return status;
 	}
-
+	/* Replay per VSI all RSS configurations */
+	status = ice_replay_rss_cfg(hw, vsi_handle);
+	if (status)
+		return status;
 	/* Replay per VSI all filters */
 	status = ice_replay_vsi_all_fltr(hw, vsi_handle);
 	return status;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index b22aa561e253..b5c013fdaaf9 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -34,10 +34,18 @@ enum ice_status
 ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
 		enum ice_aq_res_access_type access, u32 timeout);
 void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
+enum ice_status
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res);
+enum ice_status
+ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res);
 enum ice_status ice_init_nvm(struct ice_hw *hw);
 enum ice_status
 ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data);
 enum ice_status
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+		      enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+enum ice_status
 ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		struct ice_aq_desc *desc, void *buf, u16 buf_size,
 		struct ice_sq_cd *cd);
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index d3d3ec29def9..0664e5b8d130 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -396,6 +396,12 @@ dcb_error:
 	prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
 	prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
 	memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
+	/* Coverity warns the return code of ice_pf_dcb_cfg() is not checked
+	 * here as is done for other calls to that function. That check is
+	 * not necessary since this is in this function's error cleanup path.
+	 * Suppress the Coverity warning with the following comment...
+	 */
+	/* coverity[check_return] */
 	ice_pf_dcb_cfg(pf, prev_cfg, false);
 	kfree(prev_cfg);
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
index f8d5c661d0ba..ce63017c56c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_devids.h
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -11,5 +11,23 @@
 #define ICE_DEV_ID_E810C_QSFP		0x1592
 /* Intel(R) Ethernet Controller E810-C for SFP */
 #define ICE_DEV_ID_E810C_SFP		0x1593
+/* Intel(R) Ethernet Connection E822-C for backplane */
+#define ICE_DEV_ID_E822C_BACKPLANE	0x1890
+/* Intel(R) Ethernet Connection E822-C for QSFP */
+#define ICE_DEV_ID_E822C_QSFP		0x1891
+/* Intel(R) Ethernet Connection E822-C for SFP */
+#define ICE_DEV_ID_E822C_SFP		0x1892
+/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822C_10G_BASE_T	0x1893
+/* Intel(R) Ethernet Connection E822-C 1GbE */
+#define ICE_DEV_ID_E822C_SGMII		0x1894
+/* Intel(R) Ethernet Connection E822-X for backplane */
+#define ICE_DEV_ID_E822X_BACKPLANE	0x1897
+/* Intel(R) Ethernet Connection E822-L for SFP */
+#define ICE_DEV_ID_E822L_SFP		0x1898
+/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822L_10G_BASE_T	0x1899
+/* Intel(R) Ethernet Connection E822-L 1GbE */
+#define ICE_DEV_ID_E822L_SGMII		0x189A
 
 #endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 9ebd93e79aeb..90c6a3ca20c9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -4,6 +4,7 @@
 /* ethtool support for ice */
 
 #include "ice.h"
+#include "ice_flow.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
@@ -283,12 +284,15 @@ out:
  */
 static bool ice_active_vfs(struct ice_pf *pf)
 {
-	struct ice_vf *vf = pf->vf;
 	int i;
 
-	for (i = 0; i < pf->num_alloc_vfs; i++, vf++)
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
 		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
 			return true;
+	}
+
 	return false;
 }
 
@@ -2531,6 +2535,243 @@ done:
 }
 
 /**
+ * ice_parse_hdrs - parses headers from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * header types for RSS configuration
+ */
+static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc)
+{
+	u32 hdrs = ICE_FLOW_SEG_HDR_NONE;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case UDP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case SCTP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case TCP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	case UDP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	case SCTP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	default:
+		break;
+	}
+	return hdrs;
+}
+
+#define ICE_FLOW_HASH_FLD_IPV4_SA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)
+#define ICE_FLOW_HASH_FLD_IPV6_SA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)
+#define ICE_FLOW_HASH_FLD_IPV4_DA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)
+#define ICE_FLOW_HASH_FLD_IPV6_DA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)
+#define ICE_FLOW_HASH_FLD_TCP_SRC_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_TCP_DST_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_SRC_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_DST_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_SRC_PORT	\
+	BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_DST_PORT	\
+	BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)
+
+/**
+ * ice_parse_hash_flds - parses hash fields from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * hash fields for RSS configuration
+ */
+static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc)
+{
+	u64 hfld = ICE_HASH_INVALID;
+
+	if (nfc->data & RXH_IP_SRC || nfc->data & RXH_IP_DST) {
+		switch (nfc->flow_type) {
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+			if (nfc->data & RXH_IP_SRC)
+				hfld |= ICE_FLOW_HASH_FLD_IPV4_SA;
+			if (nfc->data & RXH_IP_DST)
+				hfld |= ICE_FLOW_HASH_FLD_IPV4_DA;
+			break;
+		case TCP_V6_FLOW:
+		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
+			if (nfc->data & RXH_IP_SRC)
+				hfld |= ICE_FLOW_HASH_FLD_IPV6_SA;
+			if (nfc->data & RXH_IP_DST)
+				hfld |= ICE_FLOW_HASH_FLD_IPV6_DA;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (nfc->data & RXH_L4_B_0_1 || nfc->data & RXH_L4_B_2_3) {
+		switch (nfc->flow_type) {
+		case TCP_V4_FLOW:
+		case TCP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_TCP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_TCP_DST_PORT;
+			break;
+		case UDP_V4_FLOW:
+		case UDP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_UDP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_UDP_DST_PORT;
+			break;
+		case SCTP_V4_FLOW:
+		case SCTP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_SCTP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_SCTP_DST_PORT;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return hfld;
+}
+
+/**
+ * ice_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+	u64 hashed_flds;
+	u32 hdrs;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	hashed_flds = ice_parse_hash_flds(nfc);
+	if (hashed_flds == ICE_HASH_INVALID) {
+		dev_dbg(dev, "Invalid hash fields, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	hdrs = ice_parse_hdrs(nfc);
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+		dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs);
+	if (status) {
+		dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %d\n",
+			vsi->vsi_num, status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rss_hash_opt - Retrieve hash fields for a given flow-type
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ */
+static void
+ice_get_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u64 hash_flds;
+	u32 hdrs;
+
+	dev = ice_pf_to_dev(pf);
+
+	nfc->data = 0;
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	hdrs = ice_parse_hdrs(nfc);
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+		dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	hash_flds = ice_get_rss_cfg(&pf->hw, vsi->idx, hdrs);
+	if (hash_flds == ICE_HASH_INVALID) {
+		dev_dbg(dev, "No hash fields found for the given header type, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_SA ||
+	    hash_flds & ICE_FLOW_HASH_FLD_IPV6_SA)
+		nfc->data |= (u64)RXH_IP_SRC;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_DA ||
+	    hash_flds & ICE_FLOW_HASH_FLD_IPV6_DA)
+		nfc->data |= (u64)RXH_IP_DST;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_TCP_SRC_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_UDP_SRC_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_SCTP_SRC_PORT)
+		nfc->data |= (u64)RXH_L4_B_0_1;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_TCP_DST_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_UDP_DST_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_SCTP_DST_PORT)
+		nfc->data |= (u64)RXH_L4_B_2_3;
+}
+
+/**
+ * ice_set_rxnfc - command to set Rx flow rules.
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns 0 for success and negative values for errors
+ */
+static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		return ice_set_rss_hash_opt(vsi, cmd);
+	default:
+		break;
+	}
+	return -EOPNOTSUPP;
+}
+
+/**
  * ice_get_rxnfc - command to get Rx flow classification rules
  * @netdev: network interface device structure
  * @cmd: ethtool rxnfc command
@@ -2551,6 +2792,10 @@ ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		cmd->data = vsi->rss_size;
 		ret = 0;
 		break;
+	case ETHTOOL_GRXFH:
+		ice_get_rss_hash_opt(vsi, cmd);
+		ret = 0;
+		break;
 	default:
 		break;
 	}
@@ -3585,6 +3830,53 @@ ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
 }
 
 /**
+ * ice_is_coalesce_param_invalid - check for unsupported coalesce parameters
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ *
+ * Print netdev info if driver doesn't support one of the parameters
+ * and return error. When any parameters will be implemented, remove only
+ * this parameter from param array.
+ */
+static int
+ice_is_coalesce_param_invalid(struct net_device *netdev,
+			      struct ethtool_coalesce *ec)
+{
+	struct ice_ethtool_not_used {
+		u32 value;
+		const char *name;
+	} param[] = {
+		{ec->stats_block_coalesce_usecs, "stats-block-usecs"},
+		{ec->rate_sample_interval, "sample-interval"},
+		{ec->pkt_rate_low, "pkt-rate-low"},
+		{ec->pkt_rate_high, "pkt-rate-high"},
+		{ec->rx_max_coalesced_frames, "rx-frames"},
+		{ec->rx_coalesce_usecs_irq, "rx-usecs-irq"},
+		{ec->rx_max_coalesced_frames_irq, "rx-frames-irq"},
+		{ec->tx_max_coalesced_frames, "tx-frames"},
+		{ec->tx_coalesce_usecs_irq, "tx-usecs-irq"},
+		{ec->tx_max_coalesced_frames_irq, "tx-frames-irq"},
+		{ec->rx_coalesce_usecs_low, "rx-usecs-low"},
+		{ec->rx_max_coalesced_frames_low, "rx-frames-low"},
+		{ec->tx_coalesce_usecs_low, "tx-usecs-low"},
+		{ec->tx_max_coalesced_frames_low, "tx-frames-low"},
+		{ec->rx_max_coalesced_frames_high, "rx-frames-high"},
+		{ec->tx_max_coalesced_frames_high, "tx-frames-high"}
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(param); i++) {
+		if (param[i].value) {
+			netdev_info(netdev, "Setting %s not supported\n",
+				    param[i].name);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * __ice_set_coalesce - set ITR/INTRL values for the device
  * @netdev: pointer to the netdev associated with this query
  * @ec: ethtool structure to fill with driver's coalesce settings
@@ -3600,6 +3892,9 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 
+	if (ice_is_coalesce_param_invalid(netdev, ec))
+		return -EINVAL;
+
 	if (q_num < 0) {
 		int v_idx;
 
@@ -3804,6 +4099,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.set_priv_flags		= ice_set_priv_flags,
 	.get_sset_count		= ice_get_sset_count,
 	.get_rxnfc		= ice_get_rxnfc,
+	.set_rxnfc		= ice_set_rxnfc,
 	.get_ringparam		= ice_get_ringparam,
 	.set_ringparam		= ice_set_ringparam,
 	.nway_reset		= ice_nway_reset,
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index cbd53b586c36..99208946224c 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -3,6 +3,87 @@
 
 #include "ice_common.h"
 #include "ice_flex_pipe.h"
+#include "ice_flow.h"
+
+static const u32 ice_sect_lkup[ICE_BLK_COUNT][ICE_SECT_COUNT] = {
+	/* SWITCH */
+	{
+		ICE_SID_XLT0_SW,
+		ICE_SID_XLT_KEY_BUILDER_SW,
+		ICE_SID_XLT1_SW,
+		ICE_SID_XLT2_SW,
+		ICE_SID_PROFID_TCAM_SW,
+		ICE_SID_PROFID_REDIR_SW,
+		ICE_SID_FLD_VEC_SW,
+		ICE_SID_CDID_KEY_BUILDER_SW,
+		ICE_SID_CDID_REDIR_SW
+	},
+
+	/* ACL */
+	{
+		ICE_SID_XLT0_ACL,
+		ICE_SID_XLT_KEY_BUILDER_ACL,
+		ICE_SID_XLT1_ACL,
+		ICE_SID_XLT2_ACL,
+		ICE_SID_PROFID_TCAM_ACL,
+		ICE_SID_PROFID_REDIR_ACL,
+		ICE_SID_FLD_VEC_ACL,
+		ICE_SID_CDID_KEY_BUILDER_ACL,
+		ICE_SID_CDID_REDIR_ACL
+	},
+
+	/* FD */
+	{
+		ICE_SID_XLT0_FD,
+		ICE_SID_XLT_KEY_BUILDER_FD,
+		ICE_SID_XLT1_FD,
+		ICE_SID_XLT2_FD,
+		ICE_SID_PROFID_TCAM_FD,
+		ICE_SID_PROFID_REDIR_FD,
+		ICE_SID_FLD_VEC_FD,
+		ICE_SID_CDID_KEY_BUILDER_FD,
+		ICE_SID_CDID_REDIR_FD
+	},
+
+	/* RSS */
+	{
+		ICE_SID_XLT0_RSS,
+		ICE_SID_XLT_KEY_BUILDER_RSS,
+		ICE_SID_XLT1_RSS,
+		ICE_SID_XLT2_RSS,
+		ICE_SID_PROFID_TCAM_RSS,
+		ICE_SID_PROFID_REDIR_RSS,
+		ICE_SID_FLD_VEC_RSS,
+		ICE_SID_CDID_KEY_BUILDER_RSS,
+		ICE_SID_CDID_REDIR_RSS
+	},
+
+	/* PE */
+	{
+		ICE_SID_XLT0_PE,
+		ICE_SID_XLT_KEY_BUILDER_PE,
+		ICE_SID_XLT1_PE,
+		ICE_SID_XLT2_PE,
+		ICE_SID_PROFID_TCAM_PE,
+		ICE_SID_PROFID_REDIR_PE,
+		ICE_SID_FLD_VEC_PE,
+		ICE_SID_CDID_KEY_BUILDER_PE,
+		ICE_SID_CDID_REDIR_PE
+	}
+};
+
+/**
+ * ice_sect_id - returns section ID
+ * @blk: block type
+ * @sect: section type
+ *
+ * This helper function returns the proper section ID given a block type and a
+ * section type.
+ */
+static u32 ice_sect_id(enum ice_block blk, enum ice_sect sect)
+{
+	return ice_sect_lkup[blk][sect];
+}
 
 /**
  * ice_pkg_val_buf
@@ -158,6 +239,176 @@ ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
 	return state->sect;
 }
 
+/* Key creation */
+
+#define ICE_DC_KEY	0x1	/* don't care */
+#define ICE_DC_KEYINV	0x1
+#define ICE_NM_KEY	0x0	/* never match */
+#define ICE_NM_KEYINV	0x0
+#define ICE_0_KEY	0x1	/* match 0 */
+#define ICE_0_KEYINV	0x0
+#define ICE_1_KEY	0x0	/* match 1 */
+#define ICE_1_KEYINV	0x1
+
+/**
+ * ice_gen_key_word - generate 16-bits of a key/mask word
+ * @val: the value
+ * @valid: valid bits mask (change only the valid bits)
+ * @dont_care: don't care mask
+ * @nvr_mtch: never match mask
+ * @key: pointer to an array of where the resulting key portion
+ * @key_inv: pointer to an array of where the resulting key invert portion
+ *
+ * This function generates 16-bits from a 8-bit value, an 8-bit don't care mask
+ * and an 8-bit never match mask. The 16-bits of output are divided into 8 bits
+ * of key and 8 bits of key invert.
+ *
+ *     '0' =    b01, always match a 0 bit
+ *     '1' =    b10, always match a 1 bit
+ *     '?' =    b11, don't care bit (always matches)
+ *     '~' =    b00, never match bit
+ *
+ * Input:
+ *          val:         b0  1  0  1  0  1
+ *          dont_care:   b0  0  1  1  0  0
+ *          never_mtch:  b0  0  0  0  1  1
+ *          ------------------------------
+ * Result:  key:        b01 10 11 11 00 00
+ */
+static enum ice_status
+ice_gen_key_word(u8 val, u8 valid, u8 dont_care, u8 nvr_mtch, u8 *key,
+		 u8 *key_inv)
+{
+	u8 in_key = *key, in_key_inv = *key_inv;
+	u8 i;
+
+	/* 'dont_care' and 'nvr_mtch' masks cannot overlap */
+	if ((dont_care ^ nvr_mtch) != (dont_care | nvr_mtch))
+		return ICE_ERR_CFG;
+
+	*key = 0;
+	*key_inv = 0;
+
+	/* encode the 8 bits into 8-bit key and 8-bit key invert */
+	for (i = 0; i < 8; i++) {
+		*key >>= 1;
+		*key_inv >>= 1;
+
+		if (!(valid & 0x1)) { /* change only valid bits */
+			*key |= (in_key & 0x1) << 7;
+			*key_inv |= (in_key_inv & 0x1) << 7;
+		} else if (dont_care & 0x1) { /* don't care bit */
+			*key |= ICE_DC_KEY << 7;
+			*key_inv |= ICE_DC_KEYINV << 7;
+		} else if (nvr_mtch & 0x1) { /* never match bit */
+			*key |= ICE_NM_KEY << 7;
+			*key_inv |= ICE_NM_KEYINV << 7;
+		} else if (val & 0x01) { /* exact 1 match */
+			*key |= ICE_1_KEY << 7;
+			*key_inv |= ICE_1_KEYINV << 7;
+		} else { /* exact 0 match */
+			*key |= ICE_0_KEY << 7;
+			*key_inv |= ICE_0_KEYINV << 7;
+		}
+
+		dont_care >>= 1;
+		nvr_mtch >>= 1;
+		valid >>= 1;
+		val >>= 1;
+		in_key >>= 1;
+		in_key_inv >>= 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_bits_max_set - determine if the number of bits set is within a maximum
+ * @mask: pointer to the byte array which is the mask
+ * @size: the number of bytes in the mask
+ * @max: the max number of set bits
+ *
+ * This function determines if there are at most 'max' number of bits set in an
+ * array. Returns true if the number for bits set is <= max or will return false
+ * otherwise.
+ */
+static bool ice_bits_max_set(const u8 *mask, u16 size, u16 max)
+{
+	u16 count = 0;
+	u16 i;
+
+	/* check each byte */
+	for (i = 0; i < size; i++) {
+		/* if 0, go to next byte */
+		if (!mask[i])
+			continue;
+
+		/* We know there is at least one set bit in this byte because of
+		 * the above check; if we already have found 'max' number of
+		 * bits set, then we can return failure now.
+		 */
+		if (count == max)
+			return false;
+
+		/* count the bits in this byte, checking threshold */
+		count += hweight8(mask[i]);
+		if (count > max)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_set_key - generate a variable sized key with multiples of 16-bits
+ * @key: pointer to where the key will be stored
+ * @size: the size of the complete key in bytes (must be even)
+ * @val: array of 8-bit values that makes up the value portion of the key
+ * @upd: array of 8-bit masks that determine what key portion to update
+ * @dc: array of 8-bit masks that make up the don't care mask
+ * @nm: array of 8-bit masks that make up the never match mask
+ * @off: the offset of the first byte in the key to update
+ * @len: the number of bytes in the key update
+ *
+ * This function generates a key from a value, a don't care mask and a never
+ * match mask.
+ * upd, dc, and nm are optional parameters, and can be NULL:
+ *	upd == NULL --> udp mask is all 1's (update all bits)
+ *	dc == NULL --> dc mask is all 0's (no don't care bits)
+ *	nm == NULL --> nm mask is all 0's (no never match bits)
+ */
+static enum ice_status
+ice_set_key(u8 *key, u16 size, u8 *val, u8 *upd, u8 *dc, u8 *nm, u16 off,
+	    u16 len)
+{
+	u16 half_size;
+	u16 i;
+
+	/* size must be a multiple of 2 bytes. */
+	if (size % 2)
+		return ICE_ERR_CFG;
+
+	half_size = size / 2;
+	if (off + len > half_size)
+		return ICE_ERR_CFG;
+
+	/* Make sure at most one bit is set in the never match mask. Having more
+	 * than one never match mask bit set will cause HW to consume excessive
+	 * power otherwise; this is a power management efficiency check.
+	 */
+#define ICE_NVR_MTCH_BITS_MAX	1
+	if (nm && !ice_bits_max_set(nm, len, ICE_NVR_MTCH_BITS_MAX))
+		return ICE_ERR_CFG;
+
+	for (i = 0; i < len; i++)
+		if (ice_gen_key_word(val[i], upd ? upd[i] : 0xff,
+				     dc ? dc[i] : 0, nm ? nm[i] : 0,
+				     key + off + i, key + half_size + off + i))
+			return ICE_ERR_CFG;
+
+	return 0;
+}
+
 /**
  * ice_acquire_global_cfg_lock
  * @hw: pointer to the HW structure
@@ -205,6 +456,31 @@ static void ice_release_global_cfg_lock(struct ice_hw *hw)
 }
 
 /**
+ * ice_acquire_change_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the change lock.
+ */
+static enum ice_status
+ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access)
+{
+	return ice_acquire_res(hw, ICE_CHANGE_LOCK_RES_ID, access,
+			       ICE_CHANGE_LOCK_TIMEOUT);
+}
+
+/**
+ * ice_release_change_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the change lock using the proper Admin Command.
+ */
+static void ice_release_change_lock(struct ice_hw *hw)
+{
+	ice_release_res(hw, ICE_CHANGE_LOCK_RES_ID);
+}
+
+/**
  * ice_aq_download_pkg
  * @hw: pointer to the hardware structure
  * @pkg_buf: the package buffer to transfer
@@ -253,6 +529,54 @@ ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
 }
 
 /**
+ * ice_aq_update_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package cmd buffer
+ * @buf_size: the size of the package cmd buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update Package (0x0C42)
+ */
+static enum ice_status
+ice_aq_update_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf, u16 buf_size,
+		  bool last_buf, u32 *error_offset, u32 *error_info,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_download_pkg *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (error_offset)
+		*error_offset = 0;
+	if (error_info)
+		*error_info = 0;
+
+	cmd = &desc.params.download_pkg;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_pkg);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (last_buf)
+		cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+	status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+	if (status == ICE_ERR_AQ_ERROR) {
+		/* Read error from buffer only when the FW returned an error */
+		struct ice_aqc_download_pkg_resp *resp;
+
+		resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+		if (error_offset)
+			*error_offset = le32_to_cpu(resp->error_offset);
+		if (error_info)
+			*error_info = le32_to_cpu(resp->error_info);
+	}
+
+	return status;
+}
+
+/**
  * ice_find_seg_in_pkg
  * @hw: pointer to the hardware structure
  * @seg_type: the segment type to search for (i.e., SEGMENT_TYPE_CPK)
@@ -287,6 +611,44 @@ ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type,
 }
 
 /**
+ * ice_update_pkg
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains change lock and updates package.
+ */
+static enum ice_status
+ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+	enum ice_status status;
+	u32 offset, info, i;
+
+	status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+	if (status)
+		return status;
+
+	for (i = 0; i < count; i++) {
+		struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i);
+		bool last = ((i + 1) == count);
+
+		status = ice_aq_update_pkg(hw, bh, le16_to_cpu(bh->data_end),
+					   last, &offset, &info, NULL);
+
+		if (status) {
+			ice_debug(hw, ICE_DBG_PKG,
+				  "Update pkg failed: err %d off %d inf %d\n",
+				  status, offset, info);
+			break;
+		}
+	}
+
+	ice_release_change_lock(hw);
+
+	return status;
+}
+
+/**
  * ice_dwnld_cfg_bufs
  * @hw: pointer to the hardware structure
  * @bufs: pointer to an array of buffers
@@ -767,6 +1129,169 @@ enum ice_status ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len)
 	return status;
 }
 
+/**
+ * ice_pkg_buf_alloc
+ * @hw: pointer to the HW structure
+ *
+ * Allocates a package buffer and returns a pointer to the buffer header.
+ * Note: all package contents must be in Little Endian form.
+ */
+static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw)
+{
+	struct ice_buf_build *bld;
+	struct ice_buf_hdr *buf;
+
+	bld = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*bld), GFP_KERNEL);
+	if (!bld)
+		return NULL;
+
+	buf = (struct ice_buf_hdr *)bld;
+	buf->data_end = cpu_to_le16(offsetof(struct ice_buf_hdr,
+					     section_entry));
+	return bld;
+}
+
+/**
+ * ice_pkg_buf_free
+ * @hw: pointer to the HW structure
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Frees a package buffer
+ */
+static void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld)
+{
+	devm_kfree(ice_hw_to_dev(hw), bld);
+}
+
+/**
+ * ice_pkg_buf_reserve_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @count: the number of sections to reserve
+ *
+ * Reserves one or more section table entries in a package buffer. This routine
+ * can be called multiple times as long as they are made before calling
+ * ice_pkg_buf_alloc_section(). Once ice_pkg_buf_alloc_section()
+ * is called once, the number of sections that can be allocated will not be able
+ * to be increased; not using all reserved sections is fine, but this will
+ * result in some wasted space in the buffer.
+ * Note: all package contents must be in Little Endian form.
+ */
+static enum ice_status
+ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count)
+{
+	struct ice_buf_hdr *buf;
+	u16 section_count;
+	u16 data_end;
+
+	if (!bld)
+		return ICE_ERR_PARAM;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+
+	/* already an active section, can't increase table size */
+	section_count = le16_to_cpu(buf->section_count);
+	if (section_count > 0)
+		return ICE_ERR_CFG;
+
+	if (bld->reserved_section_table_entries + count > ICE_MAX_S_COUNT)
+		return ICE_ERR_CFG;
+	bld->reserved_section_table_entries += count;
+
+	data_end = le16_to_cpu(buf->data_end) +
+		   (count * sizeof(buf->section_entry[0]));
+	buf->data_end = cpu_to_le16(data_end);
+
+	return 0;
+}
+
+/**
+ * ice_pkg_buf_alloc_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @type: the section type value
+ * @size: the size of the section to reserve (in bytes)
+ *
+ * Reserves memory in the buffer for a section's content and updates the
+ * buffers' status accordingly. This routine returns a pointer to the first
+ * byte of the section start within the buffer, which is used to fill in the
+ * section contents.
+ * Note: all package contents must be in Little Endian form.
+ */
+static void *
+ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size)
+{
+	struct ice_buf_hdr *buf;
+	u16 sect_count;
+	u16 data_end;
+
+	if (!bld || !type || !size)
+		return NULL;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+
+	/* check for enough space left in buffer */
+	data_end = le16_to_cpu(buf->data_end);
+
+	/* section start must align on 4 byte boundary */
+	data_end = ALIGN(data_end, 4);
+
+	if ((data_end + size) > ICE_MAX_S_DATA_END)
+		return NULL;
+
+	/* check for more available section table entries */
+	sect_count = le16_to_cpu(buf->section_count);
+	if (sect_count < bld->reserved_section_table_entries) {
+		void *section_ptr = ((u8 *)buf) + data_end;
+
+		buf->section_entry[sect_count].offset = cpu_to_le16(data_end);
+		buf->section_entry[sect_count].size = cpu_to_le16(size);
+		buf->section_entry[sect_count].type = cpu_to_le32(type);
+
+		data_end += size;
+		buf->data_end = cpu_to_le16(data_end);
+
+		buf->section_count = cpu_to_le16(sect_count + 1);
+		return section_ptr;
+	}
+
+	/* no free section table entries */
+	return NULL;
+}
+
+/**
+ * ice_pkg_buf_get_active_sections
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Returns the number of active sections. Before using the package buffer
+ * in an update package command, the caller should make sure that there is at
+ * least one active section - otherwise, the buffer is not legal and should
+ * not be used.
+ * Note: all package contents must be in Little Endian form.
+ */
+static u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld)
+{
+	struct ice_buf_hdr *buf;
+
+	if (!bld)
+		return 0;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+	return le16_to_cpu(buf->section_count);
+}
+
+/**
+ * ice_pkg_buf
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Return a pointer to the buffer's header
+ */
+static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
+{
+	if (!bld)
+		return NULL;
+
+	return &bld->buf;
+}
+
 /* PTG Management */
 
 /**
@@ -951,6 +1476,48 @@ enum ice_sid_all {
 	ICE_SID_OFF_COUNT,
 };
 
+/* Characteristic handling */
+
+/**
+ * ice_match_prop_lst - determine if properties of two lists match
+ * @list1: first properties list
+ * @list2: second properties list
+ *
+ * Count, cookies and the order must match in order to be considered equivalent.
+ */
+static bool
+ice_match_prop_lst(struct list_head *list1, struct list_head *list2)
+{
+	struct ice_vsig_prof *tmp1;
+	struct ice_vsig_prof *tmp2;
+	u16 chk_count = 0;
+	u16 count = 0;
+
+	/* compare counts */
+	list_for_each_entry(tmp1, list1, list)
+		count++;
+	list_for_each_entry(tmp2, list2, list)
+		chk_count++;
+	if (!count || count != chk_count)
+		return false;
+
+	tmp1 = list_first_entry(list1, struct ice_vsig_prof, list);
+	tmp2 = list_first_entry(list2, struct ice_vsig_prof, list);
+
+	/* profile cookies must compare, and in the exact same order to take
+	 * into account priority
+	 */
+	while (count--) {
+		if (tmp2->profile_cookie != tmp1->profile_cookie)
+			return false;
+
+		tmp1 = list_next_entry(tmp1, list);
+		tmp2 = list_next_entry(tmp2, list);
+	}
+
+	return true;
+}
+
 /* VSIG Management */
 
 /**
@@ -999,6 +1566,117 @@ static u16 ice_vsig_alloc_val(struct ice_hw *hw, enum ice_block blk, u16 vsig)
 }
 
 /**
+ * ice_vsig_alloc - Finds a free entry and allocates a new VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ *
+ * This function will iterate through the VSIG list and mark the first
+ * unused entry for the new VSIG entry as used and return that value.
+ */
+static u16 ice_vsig_alloc(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (!hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+			return ice_vsig_alloc_val(hw, blk, i);
+
+	return ICE_DEFAULT_VSIG;
+}
+
+/**
+ * ice_find_dup_props_vsig - find VSI group with a specified set of properties
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @chs: characteristic list
+ * @vsig: returns the VSIG with the matching profiles, if found
+ *
+ * Each VSIG is associated with a characteristic set; i.e. all VSIs under
+ * a group have the same characteristic set. To check if there exists a VSIG
+ * which has the same characteristics as the input characteristics; this
+ * function will iterate through the XLT2 list and return the VSIG that has a
+ * matching configuration. In order to make sure that priorities are accounted
+ * for, the list must match exactly, including the order in which the
+ * characteristics are listed.
+ */
+static enum ice_status
+ice_find_dup_props_vsig(struct ice_hw *hw, enum ice_block blk,
+			struct list_head *chs, u16 *vsig)
+{
+	struct ice_xlt2 *xlt2 = &hw->blk[blk].xlt2;
+	u16 i;
+
+	for (i = 0; i < xlt2->count; i++)
+		if (xlt2->vsig_tbl[i].in_use &&
+		    ice_match_prop_lst(chs, &xlt2->vsig_tbl[i].prop_lst)) {
+			*vsig = ICE_VSIG_VALUE(i, hw->pf_id);
+			return 0;
+		}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_vsig_free - free VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to remove
+ *
+ * The function will remove all VSIs associated with the input VSIG and move
+ * them to the DEFAULT_VSIG and mark the VSIG available.
+ */
+static enum ice_status
+ice_vsig_free(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	struct ice_vsig_prof *dtmp, *del;
+	struct ice_vsig_vsi *vsi_cur;
+	u16 idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+	if (idx >= ICE_MAX_VSIGS)
+		return ICE_ERR_PARAM;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	hw->blk[blk].xlt2.vsig_tbl[idx].in_use = false;
+
+	vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	/* If the VSIG has at least 1 VSI then iterate through the
+	 * list and remove the VSIs before deleting the group.
+	 */
+	if (vsi_cur) {
+		/* remove all vsis associated with this VSIG XLT2 entry */
+		do {
+			struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+
+			vsi_cur->vsig = ICE_DEFAULT_VSIG;
+			vsi_cur->changed = 1;
+			vsi_cur->next_vsi = NULL;
+			vsi_cur = tmp;
+		} while (vsi_cur);
+
+		/* NULL terminate head of VSI list */
+		hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi = NULL;
+	}
+
+	/* free characteristic list */
+	list_for_each_entry_safe(del, dtmp,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list) {
+		list_del(&del->list);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	/* if VSIG characteristic list was cleared for reset
+	 * re-initialize the list head
+	 */
+	INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+
+	return 0;
+}
+
+/**
  * ice_vsig_remove_vsi - remove VSI from VSIG
  * @hw: pointer to the hardware structure
  * @blk: HW block
@@ -1117,6 +1795,215 @@ ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
 	return 0;
 }
 
+/**
+ * ice_find_prof_id - find profile ID for a given field vector
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @fv: field vector to search for
+ * @prof_id: receives the profile ID
+ */
+static enum ice_status
+ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
+		 struct ice_fv_word *fv, u8 *prof_id)
+{
+	struct ice_es *es = &hw->blk[blk].es;
+	u16 off, i;
+
+	for (i = 0; i < es->count; i++) {
+		off = i * es->fvw;
+
+		if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv)))
+			continue;
+
+		*prof_id = i;
+		return 0;
+	}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_prof_id_rsrc_type - get profile ID resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_prof_id_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+	switch (blk) {
+	case ICE_BLK_RSS:
+		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+/**
+ * ice_tcam_ent_rsrc_type - get TCAM entry resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+	switch (blk) {
+	case ICE_BLK_RSS:
+		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+/**
+ * ice_alloc_tcam_ent - allocate hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the TCAM for
+ * @tcam_idx: pointer to variable to receive the TCAM entry
+ *
+ * This function allocates a new entry in a Profile ID TCAM for a specific
+ * block.
+ */
+static enum ice_status
+ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 *tcam_idx)
+{
+	u16 res_type;
+
+	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	return ice_alloc_hw_res(hw, res_type, 1, true, tcam_idx);
+}
+
+/**
+ * ice_free_tcam_ent - free hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the TCAM entry
+ * @tcam_idx: the TCAM entry to free
+ *
+ * This function frees an entry in a Profile ID TCAM for a specific block.
+ */
+static enum ice_status
+ice_free_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 tcam_idx)
+{
+	u16 res_type;
+
+	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	return ice_free_hw_res(hw, res_type, 1, &tcam_idx);
+}
+
+/**
+ * ice_alloc_prof_id - allocate profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the profile ID for
+ * @prof_id: pointer to variable to receive the profile ID
+ *
+ * This function allocates a new profile ID, which also corresponds to a Field
+ * Vector (Extraction Sequence) entry.
+ */
+static enum ice_status
+ice_alloc_prof_id(struct ice_hw *hw, enum ice_block blk, u8 *prof_id)
+{
+	enum ice_status status;
+	u16 res_type;
+	u16 get_prof;
+
+	if (!ice_prof_id_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	status = ice_alloc_hw_res(hw, res_type, 1, false, &get_prof);
+	if (!status)
+		*prof_id = (u8)get_prof;
+
+	return status;
+}
+
+/**
+ * ice_free_prof_id - free profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID to free
+ *
+ * This function frees a profile ID, which also corresponds to a Field Vector.
+ */
+static enum ice_status
+ice_free_prof_id(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	u16 tmp_prof_id = (u16)prof_id;
+	u16 res_type;
+
+	if (!ice_prof_id_rsrc_type(blk, &res_type))
+		return ICE_ERR_PARAM;
+
+	return ice_free_hw_res(hw, res_type, 1, &tmp_prof_id);
+}
+
+/**
+ * ice_prof_inc_ref - increment reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to increment the reference count
+ */
+static enum ice_status
+ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	if (prof_id > hw->blk[blk].es.count)
+		return ICE_ERR_PARAM;
+
+	hw->blk[blk].es.ref_count[prof_id]++;
+
+	return 0;
+}
+
+/**
+ * ice_write_es - write an extraction sequence to hardware
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write the extraction sequence
+ * @prof_id: the profile ID to write
+ * @fv: pointer to the extraction sequence to write - NULL to clear extraction
+ */
+static void
+ice_write_es(struct ice_hw *hw, enum ice_block blk, u8 prof_id,
+	     struct ice_fv_word *fv)
+{
+	u16 off;
+
+	off = prof_id * hw->blk[blk].es.fvw;
+	if (!fv) {
+		memset(&hw->blk[blk].es.t[off], 0,
+		       hw->blk[blk].es.fvw * sizeof(*fv));
+		hw->blk[blk].es.written[prof_id] = false;
+	} else {
+		memcpy(&hw->blk[blk].es.t[off], fv,
+		       hw->blk[blk].es.fvw * sizeof(*fv));
+	}
+}
+
+/**
+ * ice_prof_dec_ref - decrement reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to decrement the reference count
+ */
+static enum ice_status
+ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	if (prof_id > hw->blk[blk].es.count)
+		return ICE_ERR_PARAM;
+
+	if (hw->blk[blk].es.ref_count[prof_id] > 0) {
+		if (!--hw->blk[blk].es.ref_count[prof_id]) {
+			ice_write_es(hw, blk, prof_id, NULL);
+			return ice_free_prof_id(hw, blk, prof_id);
+		}
+	}
+
+	return 0;
+}
+
 /* Block / table section IDs */
 static const u32 ice_blk_sids[ICE_BLK_COUNT][ICE_SID_OFF_COUNT] = {
 	/* SWITCH */
@@ -1374,16 +2261,85 @@ void ice_fill_blk_tbls(struct ice_hw *hw)
 }
 
 /**
+ * ice_free_prof_map - free profile map
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_prof_map(struct ice_hw *hw, u8 blk_idx)
+{
+	struct ice_es *es = &hw->blk[blk_idx].es;
+	struct ice_prof_map *del, *tmp;
+
+	mutex_lock(&es->prof_map_lock);
+	list_for_each_entry_safe(del, tmp, &es->prof_map, list) {
+		list_del(&del->list);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+	INIT_LIST_HEAD(&es->prof_map);
+	mutex_unlock(&es->prof_map_lock);
+}
+
+/**
+ * ice_free_flow_profs - free flow profile entries
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+	struct ice_flow_prof *p, *tmp;
+
+	mutex_lock(&hw->fl_profs_locks[blk_idx]);
+	list_for_each_entry_safe(p, tmp, &hw->fl_profs[blk_idx], l_entry) {
+		list_del(&p->l_entry);
+		devm_kfree(ice_hw_to_dev(hw), p);
+	}
+	mutex_unlock(&hw->fl_profs_locks[blk_idx]);
+
+	/* if driver is in reset and tables are being cleared
+	 * re-initialize the flow profile list heads
+	 */
+	INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
+ * ice_free_vsig_tbl - free complete VSIG table entries
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block on which to free the VSIG table entries
+ */
+static void ice_free_vsig_tbl(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl)
+		return;
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+			ice_vsig_free(hw, blk, i);
+}
+
+/**
  * ice_free_hw_tbls - free hardware table memory
  * @hw: pointer to the hardware structure
  */
 void ice_free_hw_tbls(struct ice_hw *hw)
 {
+	struct ice_rss_cfg *r, *rt;
 	u8 i;
 
 	for (i = 0; i < ICE_BLK_COUNT; i++) {
-		hw->blk[i].is_list_init = false;
+		if (hw->blk[i].is_list_init) {
+			struct ice_es *es = &hw->blk[i].es;
+
+			ice_free_prof_map(hw, i);
+			mutex_destroy(&es->prof_map_lock);
 
+			ice_free_flow_profs(hw, i);
+			mutex_destroy(&hw->fl_profs_locks[i]);
+
+			hw->blk[i].is_list_init = false;
+		}
+		ice_free_vsig_tbl(hw, (enum ice_block)i);
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptypes);
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptg_tbl);
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.t);
@@ -1397,10 +2353,26 @@ void ice_free_hw_tbls(struct ice_hw *hw)
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written);
 	}
 
+	list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) {
+		list_del(&r->l_entry);
+		devm_kfree(ice_hw_to_dev(hw), r);
+	}
+	mutex_destroy(&hw->rss_locks);
 	memset(hw->blk, 0, sizeof(hw->blk));
 }
 
 /**
+ * ice_init_flow_profs - init flow profile locks and list heads
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_init_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+	mutex_init(&hw->fl_profs_locks[blk_idx]);
+	INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
  * ice_clear_hw_tbls - clear HW tables and flow profiles
  * @hw: pointer to the hardware structure
  */
@@ -1415,6 +2387,13 @@ void ice_clear_hw_tbls(struct ice_hw *hw)
 		struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
 		struct ice_es *es = &hw->blk[i].es;
 
+		if (hw->blk[i].is_list_init) {
+			ice_free_prof_map(hw, i);
+			ice_free_flow_profs(hw, i);
+		}
+
+		ice_free_vsig_tbl(hw, (enum ice_block)i);
+
 		memset(xlt1->ptypes, 0, xlt1->count * sizeof(*xlt1->ptypes));
 		memset(xlt1->ptg_tbl, 0,
 		       ICE_MAX_PTGS * sizeof(*xlt1->ptg_tbl));
@@ -1443,6 +2422,8 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
 {
 	u8 i;
 
+	mutex_init(&hw->rss_locks);
+	INIT_LIST_HEAD(&hw->rss_list_head);
 	for (i = 0; i < ICE_BLK_COUNT; i++) {
 		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
 		struct ice_prof_tcam *prof = &hw->blk[i].prof;
@@ -1454,6 +2435,9 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
 		if (hw->blk[i].is_list_init)
 			continue;
 
+		ice_init_flow_profs(hw, i);
+		mutex_init(&es->prof_map_lock);
+		INIT_LIST_HEAD(&es->prof_map);
 		hw->blk[i].is_list_init = true;
 
 		hw->blk[i].overwrite = blk_sizes[i].overwrite;
@@ -1547,3 +2531,1580 @@ err:
 	ice_free_hw_tbls(hw);
 	return ICE_ERR_NO_MEMORY;
 }
+
+/**
+ * ice_prof_gen_key - generate profile ID key
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ * @key: output of profile ID key
+ */
+static enum ice_status
+ice_prof_gen_key(struct ice_hw *hw, enum ice_block blk, u8 ptg, u16 vsig,
+		 u8 cdid, u16 flags, u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+		 u8 dc_msk[ICE_TCAM_KEY_VAL_SZ], u8 nm_msk[ICE_TCAM_KEY_VAL_SZ],
+		 u8 key[ICE_TCAM_KEY_SZ])
+{
+	struct ice_prof_id_key inkey;
+
+	inkey.xlt1 = ptg;
+	inkey.xlt2_cdid = cpu_to_le16(vsig);
+	inkey.flags = cpu_to_le16(flags);
+
+	switch (hw->blk[blk].prof.cdid_bits) {
+	case 0:
+		break;
+	case 2:
+#define ICE_CD_2_M 0xC000U
+#define ICE_CD_2_S 14
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_2_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_2_S);
+		break;
+	case 4:
+#define ICE_CD_4_M 0xF000U
+#define ICE_CD_4_S 12
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_4_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_4_S);
+		break;
+	case 8:
+#define ICE_CD_8_M 0xFF00U
+#define ICE_CD_8_S 16
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_8_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_8_S);
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "Error in profile config\n");
+		break;
+	}
+
+	return ice_set_key(key, ICE_TCAM_KEY_SZ, (u8 *)&inkey, vl_msk, dc_msk,
+			   nm_msk, 0, ICE_TCAM_KEY_SZ / 2);
+}
+
+/**
+ * ice_tcam_write_entry - write TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @idx: the entry index to write to
+ * @prof_id: profile ID
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ */
+static enum ice_status
+ice_tcam_write_entry(struct ice_hw *hw, enum ice_block blk, u16 idx,
+		     u8 prof_id, u8 ptg, u16 vsig, u8 cdid, u16 flags,
+		     u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+		     u8 dc_msk[ICE_TCAM_KEY_VAL_SZ],
+		     u8 nm_msk[ICE_TCAM_KEY_VAL_SZ])
+{
+	struct ice_prof_tcam_entry;
+	enum ice_status status;
+
+	status = ice_prof_gen_key(hw, blk, ptg, vsig, cdid, flags, vl_msk,
+				  dc_msk, nm_msk, hw->blk[blk].prof.t[idx].key);
+	if (!status) {
+		hw->blk[blk].prof.t[idx].addr = cpu_to_le16(idx);
+		hw->blk[blk].prof.t[idx].prof_id = prof_id;
+	}
+
+	return status;
+}
+
+/**
+ * ice_vsig_get_ref - returns number of VSIs belong to a VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to query
+ * @refs: pointer to variable to receive the reference count
+ */
+static enum ice_status
+ice_vsig_get_ref(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 *refs)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_vsi *ptr;
+
+	*refs = 0;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	ptr = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	while (ptr) {
+		(*refs)++;
+		ptr = ptr->next_vsi;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_has_prof_vsig - check to see if VSIG has a specific profile
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to check against
+ * @hdl: profile handle
+ */
+static bool
+ice_has_prof_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_prof *ent;
+
+	list_for_each_entry(ent, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list)
+		if (ent->profile_cookie == hdl)
+			return true;
+
+	ice_debug(hw, ICE_DBG_INIT,
+		  "Characteristic list for VSI group %d not found.\n",
+		  vsig);
+	return false;
+}
+
+/**
+ * ice_prof_bld_es - build profile ID extraction sequence changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_es(struct ice_hw *hw, enum ice_block blk,
+		struct ice_buf_build *bld, struct list_head *chgs)
+{
+	u16 vec_size = hw->blk[blk].es.fvw * sizeof(struct ice_fv_word);
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_PTG_ES_ADD && tmp->add_prof) {
+			u16 off = tmp->prof_id * hw->blk[blk].es.fvw;
+			struct ice_pkg_es *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_VEC_TBL);
+			p = (struct ice_pkg_es *)
+				ice_pkg_buf_alloc_section(bld, id, sizeof(*p) +
+							  vec_size -
+							  sizeof(p->es[0]));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->prof_id);
+
+			memcpy(p->es, &hw->blk[blk].es.t[off], vec_size);
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_tcam - build profile ID TCAM changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_tcam(struct ice_hw *hw, enum ice_block blk,
+		  struct ice_buf_build *bld, struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_TCAM_ADD && tmp->add_tcam_idx) {
+			struct ice_prof_id_section *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_PROF_TCAM);
+			p = (struct ice_prof_id_section *)
+				ice_pkg_buf_alloc_section(bld, id, sizeof(*p));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->entry[0].addr = cpu_to_le16(tmp->tcam_idx);
+			p->entry[0].prof_id = tmp->prof_id;
+
+			memcpy(p->entry[0].key,
+			       &hw->blk[blk].prof.t[tmp->tcam_idx].key,
+			       sizeof(hw->blk[blk].prof.t->key));
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_xlt1 - build XLT1 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_xlt1(enum ice_block blk, struct ice_buf_build *bld,
+		  struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_PTG_ES_ADD && tmp->add_ptg) {
+			struct ice_xlt1_section *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_XLT1);
+			p = (struct ice_xlt1_section *)
+				ice_pkg_buf_alloc_section(bld, id, sizeof(*p));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->ptype);
+			p->value[0] = tmp->ptg;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_xlt2 - build XLT2 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_prof_bld_xlt2(enum ice_block blk, struct ice_buf_build *bld,
+		  struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry) {
+		struct ice_xlt2_section *p;
+		u32 id;
+
+		switch (tmp->type) {
+		case ICE_VSIG_ADD:
+		case ICE_VSI_MOVE:
+		case ICE_VSIG_REM:
+			id = ice_sect_id(blk, ICE_XLT2);
+			p = (struct ice_xlt2_section *)
+				ice_pkg_buf_alloc_section(bld, id, sizeof(*p));
+
+			if (!p)
+				return ICE_ERR_MAX_LIMIT;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->vsi);
+			p->value[0] = cpu_to_le16(tmp->vsig);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_upd_prof_hw - update hardware using the change list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @chgs: the list of changes to make in hardware
+ */
+static enum ice_status
+ice_upd_prof_hw(struct ice_hw *hw, enum ice_block blk,
+		struct list_head *chgs)
+{
+	struct ice_buf_build *b;
+	struct ice_chs_chg *tmp;
+	enum ice_status status;
+	u16 pkg_sects;
+	u16 xlt1 = 0;
+	u16 xlt2 = 0;
+	u16 tcam = 0;
+	u16 es = 0;
+	u16 sects;
+
+	/* count number of sections we need */
+	list_for_each_entry(tmp, chgs, list_entry) {
+		switch (tmp->type) {
+		case ICE_PTG_ES_ADD:
+			if (tmp->add_ptg)
+				xlt1++;
+			if (tmp->add_prof)
+				es++;
+			break;
+		case ICE_TCAM_ADD:
+			tcam++;
+			break;
+		case ICE_VSIG_ADD:
+		case ICE_VSI_MOVE:
+		case ICE_VSIG_REM:
+			xlt2++;
+			break;
+		default:
+			break;
+		}
+	}
+	sects = xlt1 + xlt2 + tcam + es;
+
+	if (!sects)
+		return 0;
+
+	/* Build update package buffer */
+	b = ice_pkg_buf_alloc(hw);
+	if (!b)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_pkg_buf_reserve_section(b, sects);
+	if (status)
+		goto error_tmp;
+
+	/* Preserve order of table update: ES, TCAM, PTG, VSIG */
+	if (es) {
+		status = ice_prof_bld_es(hw, blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (tcam) {
+		status = ice_prof_bld_tcam(hw, blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (xlt1) {
+		status = ice_prof_bld_xlt1(blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (xlt2) {
+		status = ice_prof_bld_xlt2(blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	/* After package buffer build check if the section count in buffer is
+	 * non-zero and matches the number of sections detected for package
+	 * update.
+	 */
+	pkg_sects = ice_pkg_buf_get_active_sections(b);
+	if (!pkg_sects || pkg_sects != sects) {
+		status = ICE_ERR_INVAL_SIZE;
+		goto error_tmp;
+	}
+
+	/* update package */
+	status = ice_update_pkg(hw, ice_pkg_buf(b), 1);
+	if (status == ICE_ERR_AQ_ERROR)
+		ice_debug(hw, ICE_DBG_INIT, "Unable to update HW profile\n");
+
+error_tmp:
+	ice_pkg_buf_free(hw, b);
+	return status;
+}
+
+/**
+ * ice_add_prof - add profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits)
+ * @es: extraction sequence (length of array is determined by the block)
+ *
+ * This function registers a profile, which matches a set of PTGs with a
+ * particular extraction sequence. While the hardware profile is allocated
+ * it will not be written until the first call to ice_add_flow that specifies
+ * the ID value used here.
+ */
+enum ice_status
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     struct ice_fv_word *es)
+{
+	u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
+	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+	struct ice_prof_map *prof;
+	enum ice_status status;
+	u32 byte = 0;
+	u8 prof_id;
+
+	bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+	/* search for existing profile */
+	status = ice_find_prof_id(hw, blk, es, &prof_id);
+	if (status) {
+		/* allocate profile ID */
+		status = ice_alloc_prof_id(hw, blk, &prof_id);
+		if (status)
+			goto err_ice_add_prof;
+
+		/* and write new es */
+		ice_write_es(hw, blk, prof_id, es);
+	}
+
+	ice_prof_inc_ref(hw, blk, prof_id);
+
+	/* add profile info */
+	prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*prof), GFP_KERNEL);
+	if (!prof)
+		goto err_ice_add_prof;
+
+	prof->profile_cookie = id;
+	prof->prof_id = prof_id;
+	prof->ptg_cnt = 0;
+	prof->context = 0;
+
+	/* build list of ptgs */
+	while (bytes && prof->ptg_cnt < ICE_MAX_PTG_PER_PROFILE) {
+		u32 bit;
+
+		if (!ptypes[byte]) {
+			bytes--;
+			byte++;
+			continue;
+		}
+
+		/* Examine 8 bits per byte */
+		for_each_set_bit(bit, (unsigned long *)&ptypes[byte],
+				 BITS_PER_BYTE) {
+			u16 ptype;
+			u8 ptg;
+			u8 m;
+
+			ptype = byte * BITS_PER_BYTE + bit;
+
+			/* The package should place all ptypes in a non-zero
+			 * PTG, so the following call should never fail.
+			 */
+			if (ice_ptg_find_ptype(hw, blk, ptype, &ptg))
+				continue;
+
+			/* If PTG is already added, skip and continue */
+			if (test_bit(ptg, ptgs_used))
+				continue;
+
+			set_bit(ptg, ptgs_used);
+			prof->ptg[prof->ptg_cnt] = ptg;
+
+			if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+				break;
+
+			/* nothing left in byte, then exit */
+			m = ~((1 << (bit + 1)) - 1);
+			if (!(ptypes[byte] & m))
+				break;
+		}
+
+		bytes--;
+		byte++;
+	}
+
+	list_add(&prof->list, &hw->blk[blk].es.prof_map);
+	status = 0;
+
+err_ice_add_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_search_prof_id_low - Search for a profile tracking ID low level
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will search for a profile tracking ID which was previously added. This
+ * version assumes that the caller has already acquired the prof map lock.
+ */
+static struct ice_prof_map *
+ice_search_prof_id_low(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_prof_map *entry = NULL;
+	struct ice_prof_map *map;
+
+	list_for_each_entry(map, &hw->blk[blk].es.prof_map, list)
+		if (map->profile_cookie == id) {
+			entry = map;
+			break;
+		}
+
+	return entry;
+}
+
+/**
+ * ice_search_prof_id - Search for a profile tracking ID
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will search for a profile tracking ID which was previously added.
+ */
+static struct ice_prof_map *
+ice_search_prof_id(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_prof_map *entry;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+	entry = ice_search_prof_id_low(hw, blk, id);
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+
+	return entry;
+}
+
+/**
+ * ice_vsig_prof_id_count - count profiles in a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ */
+static u16
+ice_vsig_prof_id_count(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M, count = 0;
+	struct ice_vsig_prof *p;
+
+	list_for_each_entry(p, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list)
+		count++;
+
+	return count;
+}
+
+/**
+ * ice_rel_tcam_idx - release a TCAM index
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: the index to release
+ */
+static enum ice_status
+ice_rel_tcam_idx(struct ice_hw *hw, enum ice_block blk, u16 idx)
+{
+	/* Masks to invoke a never match entry */
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFE, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 };
+	enum ice_status status;
+
+	/* write the TCAM entry */
+	status = ice_tcam_write_entry(hw, blk, idx, 0, 0, 0, 0, 0, vl_msk,
+				      dc_msk, nm_msk);
+	if (status)
+		return status;
+
+	/* release the TCAM entry */
+	status = ice_free_tcam_ent(hw, blk, idx);
+
+	return status;
+}
+
+/**
+ * ice_rem_prof_id - remove one profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof: pointer to profile structure to remove
+ */
+static enum ice_status
+ice_rem_prof_id(struct ice_hw *hw, enum ice_block blk,
+		struct ice_vsig_prof *prof)
+{
+	enum ice_status status;
+	u16 i;
+
+	for (i = 0; i < prof->tcam_count; i++)
+		if (prof->tcam[i].in_use) {
+			prof->tcam[i].in_use = false;
+			status = ice_rel_tcam_idx(hw, blk,
+						  prof->tcam[i].tcam_idx);
+			if (status)
+				return ICE_ERR_HW_TABLE;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_rem_vsig - remove VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to remove
+ * @chg: the change list
+ */
+static enum ice_status
+ice_rem_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+	     struct list_head *chg)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_vsi *vsi_cur;
+	struct ice_vsig_prof *d, *t;
+	enum ice_status status;
+
+	/* remove TCAM entries */
+	list_for_each_entry_safe(d, t,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list) {
+		status = ice_rem_prof_id(hw, blk, d);
+		if (status)
+			return status;
+
+		list_del(&d->list);
+		devm_kfree(ice_hw_to_dev(hw), d);
+	}
+
+	/* Move all VSIS associated with this VSIG to the default VSIG */
+	vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	/* If the VSIG has at least 1 VSI then iterate through the list
+	 * and remove the VSIs before deleting the group.
+	 */
+	if (vsi_cur)
+		do {
+			struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+			struct ice_chs_chg *p;
+
+			p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+					 GFP_KERNEL);
+			if (!p)
+				return ICE_ERR_NO_MEMORY;
+
+			p->type = ICE_VSIG_REM;
+			p->orig_vsig = vsig;
+			p->vsig = ICE_DEFAULT_VSIG;
+			p->vsi = vsi_cur - hw->blk[blk].xlt2.vsis;
+
+			list_add(&p->list_entry, chg);
+
+			vsi_cur = tmp;
+		} while (vsi_cur);
+
+	return ice_vsig_free(hw, blk, vsig);
+}
+
+/**
+ * ice_rem_prof_id_vsig - remove a specific profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ * @hdl: profile handle indicating which profile to remove
+ * @chg: list to receive a record of changes
+ */
+static enum ice_status
+ice_rem_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+		     struct list_head *chg)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_prof *p, *t;
+	enum ice_status status;
+
+	list_for_each_entry_safe(p, t,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list)
+		if (p->profile_cookie == hdl) {
+			if (ice_vsig_prof_id_count(hw, blk, vsig) == 1)
+				/* this is the last profile, remove the VSIG */
+				return ice_rem_vsig(hw, blk, vsig, chg);
+
+			status = ice_rem_prof_id(hw, blk, p);
+			if (!status) {
+				list_del(&p->list);
+				devm_kfree(ice_hw_to_dev(hw), p);
+			}
+			return status;
+		}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_rem_flow_all - remove all flows with a particular profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ */
+static enum ice_status
+ice_rem_flow_all(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_chs_chg *del, *tmp;
+	enum ice_status status;
+	struct list_head chg;
+	u16 i;
+
+	INIT_LIST_HEAD(&chg);
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (hw->blk[blk].xlt2.vsig_tbl[i].in_use) {
+			if (ice_has_prof_vsig(hw, blk, i, id)) {
+				status = ice_rem_prof_id_vsig(hw, blk, i, id,
+							      &chg);
+				if (status)
+					goto err_ice_rem_flow_all;
+			}
+		}
+
+	status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_flow_all:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	return status;
+}
+
+/**
+ * ice_rem_prof - remove profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will remove the profile specified by the ID parameter, which was
+ * previously created through ice_add_prof. If any existing entries
+ * are associated with this profile, they will be removed as well.
+ */
+enum ice_status ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_prof_map *pmap;
+	enum ice_status status;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+	pmap = ice_search_prof_id_low(hw, blk, id);
+	if (!pmap) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto err_ice_rem_prof;
+	}
+
+	/* remove all flows with this profile */
+	status = ice_rem_flow_all(hw, blk, pmap->profile_cookie);
+	if (status)
+		goto err_ice_rem_prof;
+
+	/* dereference profile, and possibly remove */
+	ice_prof_dec_ref(hw, blk, pmap->prof_id);
+
+	list_del(&pmap->list);
+	devm_kfree(ice_hw_to_dev(hw), pmap);
+
+err_ice_rem_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_get_prof - get profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: profile handle
+ * @chg: change list
+ */
+static enum ice_status
+ice_get_prof(struct ice_hw *hw, enum ice_block blk, u64 hdl,
+	     struct list_head *chg)
+{
+	struct ice_prof_map *map;
+	struct ice_chs_chg *p;
+	u16 i;
+
+	/* Get the details on the profile specified by the handle ID */
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	for (i = 0; i < map->ptg_cnt; i++)
+		if (!hw->blk[blk].es.written[map->prof_id]) {
+			/* add ES to change list */
+			p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+					 GFP_KERNEL);
+			if (!p)
+				goto err_ice_get_prof;
+
+			p->type = ICE_PTG_ES_ADD;
+			p->ptype = 0;
+			p->ptg = map->ptg[i];
+			p->add_ptg = 0;
+
+			p->add_prof = 1;
+			p->prof_id = map->prof_id;
+
+			hw->blk[blk].es.written[map->prof_id] = true;
+
+			list_add(&p->list_entry, chg);
+		}
+
+	return 0;
+
+err_ice_get_prof:
+	/* let caller clean up the change list */
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_get_profs_vsig - get a copy of the list of profiles from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG from which to copy the list
+ * @lst: output list
+ *
+ * This routine makes a copy of the list of profiles in the specified VSIG.
+ */
+static enum ice_status
+ice_get_profs_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+		   struct list_head *lst)
+{
+	struct ice_vsig_prof *ent1, *ent2;
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+
+	list_for_each_entry(ent1, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list) {
+		struct ice_vsig_prof *p;
+
+		/* copy to the input list */
+		p = devm_kmemdup(ice_hw_to_dev(hw), ent1, sizeof(*p),
+				 GFP_KERNEL);
+		if (!p)
+			goto err_ice_get_profs_vsig;
+
+		list_add_tail(&p->list, lst);
+	}
+
+	return 0;
+
+err_ice_get_profs_vsig:
+	list_for_each_entry_safe(ent1, ent2, lst, list) {
+		list_del(&ent1->list);
+		devm_kfree(ice_hw_to_dev(hw), ent1);
+	}
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_add_prof_to_lst - add profile entry to a list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @lst: the list to be added to
+ * @hdl: profile handle of entry to add
+ */
+static enum ice_status
+ice_add_prof_to_lst(struct ice_hw *hw, enum ice_block blk,
+		    struct list_head *lst, u64 hdl)
+{
+	struct ice_prof_map *map;
+	struct ice_vsig_prof *p;
+	u16 i;
+
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ICE_ERR_NO_MEMORY;
+
+	p->profile_cookie = map->profile_cookie;
+	p->prof_id = map->prof_id;
+	p->tcam_count = map->ptg_cnt;
+
+	for (i = 0; i < map->ptg_cnt; i++) {
+		p->tcam[i].prof_id = map->prof_id;
+		p->tcam[i].tcam_idx = ICE_INVALID_TCAM;
+		p->tcam[i].ptg = map->ptg[i];
+	}
+
+	list_add(&p->list, lst);
+
+	return 0;
+}
+
+/**
+ * ice_move_vsi - move VSI to another VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to move
+ * @vsig: the VSIG to move the VSI to
+ * @chg: the change list
+ */
+static enum ice_status
+ice_move_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig,
+	     struct list_head *chg)
+{
+	enum ice_status status;
+	struct ice_chs_chg *p;
+	u16 orig_vsig;
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+	if (!status)
+		status = ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), p);
+		return status;
+	}
+
+	p->type = ICE_VSI_MOVE;
+	p->vsi = vsi;
+	p->orig_vsig = orig_vsig;
+	p->vsig = vsig;
+
+	list_add(&p->list_entry, chg);
+
+	return 0;
+}
+
+/**
+ * ice_prof_tcam_ena_dis - add enable or disable TCAM change
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @enable: true to enable, false to disable
+ * @vsig: the VSIG of the TCAM entry
+ * @tcam: pointer the TCAM info structure of the TCAM to disable
+ * @chg: the change list
+ *
+ * This function appends an enable or disable TCAM entry in the change log
+ */
+static enum ice_status
+ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
+		      u16 vsig, struct ice_tcam_inf *tcam,
+		      struct list_head *chg)
+{
+	enum ice_status status;
+	struct ice_chs_chg *p;
+
+	/* Default: enable means change the low flag bit to don't care */
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 };
+
+	/* if disabling, free the TCAM */
+	if (!enable) {
+		status = ice_free_tcam_ent(hw, blk, tcam->tcam_idx);
+		tcam->tcam_idx = 0;
+		tcam->in_use = 0;
+		return status;
+	}
+
+	/* for re-enabling, reallocate a TCAM */
+	status = ice_alloc_tcam_ent(hw, blk, &tcam->tcam_idx);
+	if (status)
+		return status;
+
+	/* add TCAM to change list */
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id,
+				      tcam->ptg, vsig, 0, 0, vl_msk, dc_msk,
+				      nm_msk);
+	if (status)
+		goto err_ice_prof_tcam_ena_dis;
+
+	tcam->in_use = 1;
+
+	p->type = ICE_TCAM_ADD;
+	p->add_tcam_idx = true;
+	p->prof_id = tcam->prof_id;
+	p->ptg = tcam->ptg;
+	p->vsig = 0;
+	p->tcam_idx = tcam->tcam_idx;
+
+	/* log change */
+	list_add(&p->list_entry, chg);
+
+	return 0;
+
+err_ice_prof_tcam_ena_dis:
+	devm_kfree(ice_hw_to_dev(hw), p);
+	return status;
+}
+
+/**
+ * ice_adj_prof_priorities - adjust profile based on priorities
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG for which to adjust profile priorities
+ * @chg: the change list
+ */
+static enum ice_status
+ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+			struct list_head *chg)
+{
+	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+	struct ice_vsig_prof *t;
+	enum ice_status status;
+	u16 idx;
+
+	bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	/* Priority is based on the order in which the profiles are added. The
+	 * newest added profile has highest priority and the oldest added
+	 * profile has the lowest priority. Since the profile property list for
+	 * a VSIG is sorted from newest to oldest, this code traverses the list
+	 * in order and enables the first of each PTG that it finds (that is not
+	 * already enabled); it also disables any duplicate PTGs that it finds
+	 * in the older profiles (that are currently enabled).
+	 */
+
+	list_for_each_entry(t, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list) {
+		u16 i;
+
+		for (i = 0; i < t->tcam_count; i++) {
+			/* Scan the priorities from newest to oldest.
+			 * Make sure that the newest profiles take priority.
+			 */
+			if (test_bit(t->tcam[i].ptg, ptgs_used) &&
+			    t->tcam[i].in_use) {
+				/* need to mark this PTG as never match, as it
+				 * was already in use and therefore duplicate
+				 * (and lower priority)
+				 */
+				status = ice_prof_tcam_ena_dis(hw, blk, false,
+							       vsig,
+							       &t->tcam[i],
+							       chg);
+				if (status)
+					return status;
+			} else if (!test_bit(t->tcam[i].ptg, ptgs_used) &&
+				   !t->tcam[i].in_use) {
+				/* need to enable this PTG, as it in not in use
+				 * and not enabled (highest priority)
+				 */
+				status = ice_prof_tcam_ena_dis(hw, blk, true,
+							       vsig,
+							       &t->tcam[i],
+							       chg);
+				if (status)
+					return status;
+			}
+
+			/* keep track of used ptgs */
+			set_bit(t->tcam[i].ptg, ptgs_used);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_add_prof_id_vsig - add profile to VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to which this profile is to be added
+ * @hdl: the profile handle indicating the profile to add
+ * @chg: the change list
+ */
+static enum ice_status
+ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+		     struct list_head *chg)
+{
+	/* Masks that ignore flags */
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+	struct ice_prof_map *map;
+	struct ice_vsig_prof *t;
+	struct ice_chs_chg *p;
+	u16 i;
+
+	/* Get the details on the profile specified by the handle ID */
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	/* Error, if this VSIG already has this profile */
+	if (ice_has_prof_vsig(hw, blk, vsig, hdl))
+		return ICE_ERR_ALREADY_EXISTS;
+
+	/* new VSIG profile structure */
+	t = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return ICE_ERR_NO_MEMORY;
+
+	t->profile_cookie = map->profile_cookie;
+	t->prof_id = map->prof_id;
+	t->tcam_count = map->ptg_cnt;
+
+	/* create TCAM entries */
+	for (i = 0; i < map->ptg_cnt; i++) {
+		enum ice_status status;
+		u16 tcam_idx;
+
+		/* add TCAM to change list */
+		p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+		if (!p)
+			goto err_ice_add_prof_id_vsig;
+
+		/* allocate the TCAM entry index */
+		status = ice_alloc_tcam_ent(hw, blk, &tcam_idx);
+		if (status) {
+			devm_kfree(ice_hw_to_dev(hw), p);
+			goto err_ice_add_prof_id_vsig;
+		}
+
+		t->tcam[i].ptg = map->ptg[i];
+		t->tcam[i].prof_id = map->prof_id;
+		t->tcam[i].tcam_idx = tcam_idx;
+		t->tcam[i].in_use = true;
+
+		p->type = ICE_TCAM_ADD;
+		p->add_tcam_idx = true;
+		p->prof_id = t->tcam[i].prof_id;
+		p->ptg = t->tcam[i].ptg;
+		p->vsig = vsig;
+		p->tcam_idx = t->tcam[i].tcam_idx;
+
+		/* write the TCAM entry */
+		status = ice_tcam_write_entry(hw, blk, t->tcam[i].tcam_idx,
+					      t->tcam[i].prof_id,
+					      t->tcam[i].ptg, vsig, 0, 0,
+					      vl_msk, dc_msk, nm_msk);
+		if (status)
+			goto err_ice_add_prof_id_vsig;
+
+		/* log change */
+		list_add(&p->list_entry, chg);
+	}
+
+	/* add profile to VSIG */
+	list_add(&t->list,
+		 &hw->blk[blk].xlt2.vsig_tbl[(vsig & ICE_VSIG_IDX_M)].prop_lst);
+
+	return 0;
+
+err_ice_add_prof_id_vsig:
+	/* let caller clean up the change list */
+	devm_kfree(ice_hw_to_dev(hw), t);
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_create_prof_id_vsig - add a new VSIG with a single profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @hdl: the profile handle of the profile that will be added to the VSIG
+ * @chg: the change list
+ */
+static enum ice_status
+ice_create_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl,
+			struct list_head *chg)
+{
+	enum ice_status status;
+	struct ice_chs_chg *p;
+	u16 new_vsig;
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return ICE_ERR_NO_MEMORY;
+
+	new_vsig = ice_vsig_alloc(hw, blk);
+	if (!new_vsig) {
+		status = ICE_ERR_HW_TABLE;
+		goto err_ice_create_prof_id_vsig;
+	}
+
+	status = ice_move_vsi(hw, blk, vsi, new_vsig, chg);
+	if (status)
+		goto err_ice_create_prof_id_vsig;
+
+	status = ice_add_prof_id_vsig(hw, blk, new_vsig, hdl, chg);
+	if (status)
+		goto err_ice_create_prof_id_vsig;
+
+	p->type = ICE_VSIG_ADD;
+	p->vsi = vsi;
+	p->orig_vsig = ICE_DEFAULT_VSIG;
+	p->vsig = new_vsig;
+
+	list_add(&p->list_entry, chg);
+
+	return 0;
+
+err_ice_create_prof_id_vsig:
+	/* let caller clean up the change list */
+	devm_kfree(ice_hw_to_dev(hw), p);
+	return status;
+}
+
+/**
+ * ice_create_vsig_from_lst - create a new VSIG with a list of profiles
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @lst: the list of profile that will be added to the VSIG
+ * @chg: the change list
+ */
+static enum ice_status
+ice_create_vsig_from_lst(struct ice_hw *hw, enum ice_block blk, u16 vsi,
+			 struct list_head *lst, struct list_head *chg)
+{
+	struct ice_vsig_prof *t;
+	enum ice_status status;
+	u16 vsig;
+
+	vsig = ice_vsig_alloc(hw, blk);
+	if (!vsig)
+		return ICE_ERR_HW_TABLE;
+
+	status = ice_move_vsi(hw, blk, vsi, vsig, chg);
+	if (status)
+		return status;
+
+	list_for_each_entry(t, lst, list) {
+		status = ice_add_prof_id_vsig(hw, blk, vsig, t->profile_cookie,
+					      chg);
+		if (status)
+			return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_find_prof_vsig - find a VSIG with a specific profile handle
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: the profile handle of the profile to search for
+ * @vsig: returns the VSIG with the matching profile
+ */
+static bool
+ice_find_prof_vsig(struct ice_hw *hw, enum ice_block blk, u64 hdl, u16 *vsig)
+{
+	struct ice_vsig_prof *t;
+	enum ice_status status;
+	struct list_head lst;
+
+	INIT_LIST_HEAD(&lst);
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return false;
+
+	t->profile_cookie = hdl;
+	list_add(&t->list, &lst);
+
+	status = ice_find_dup_props_vsig(hw, blk, &lst, vsig);
+
+	list_del(&t->list);
+	kfree(t);
+
+	return !status;
+}
+
+/**
+ * ice_add_prof_id_flow - add profile flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to enable with the profile specified by ID
+ * @hdl: profile handle
+ *
+ * Calling this function will update the hardware tables to enable the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be enabled.
+ */
+enum ice_status
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+	struct ice_vsig_prof *tmp1, *del1;
+	struct ice_chs_chg *tmp, *del;
+	struct list_head union_lst;
+	enum ice_status status;
+	struct list_head chg;
+	u16 vsig;
+
+	INIT_LIST_HEAD(&union_lst);
+	INIT_LIST_HEAD(&chg);
+
+	/* Get profile */
+	status = ice_get_prof(hw, blk, hdl, &chg);
+	if (status)
+		return status;
+
+	/* determine if VSI is already part of a VSIG */
+	status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+	if (!status && vsig) {
+		bool only_vsi;
+		u16 or_vsig;
+		u16 ref;
+
+		/* found in VSIG */
+		or_vsig = vsig;
+
+		/* make sure that there is no overlap/conflict between the new
+		 * characteristics and the existing ones; we don't support that
+		 * scenario
+		 */
+		if (ice_has_prof_vsig(hw, blk, vsig, hdl)) {
+			status = ICE_ERR_ALREADY_EXISTS;
+			goto err_ice_add_prof_id_flow;
+		}
+
+		/* last VSI in the VSIG? */
+		status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+		only_vsi = (ref == 1);
+
+		/* create a union of the current profiles and the one being
+		 * added
+		 */
+		status = ice_get_profs_vsig(hw, blk, vsig, &union_lst);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+
+		status = ice_add_prof_to_lst(hw, blk, &union_lst, hdl);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+
+		/* search for an existing VSIG with an exact charc match */
+		status = ice_find_dup_props_vsig(hw, blk, &union_lst, &vsig);
+		if (!status) {
+			/* move VSI to the VSIG that matches */
+			status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* VSI has been moved out of or_vsig. If the or_vsig had
+			 * only that VSI it is now empty and can be removed.
+			 */
+			if (only_vsi) {
+				status = ice_rem_vsig(hw, blk, or_vsig, &chg);
+				if (status)
+					goto err_ice_add_prof_id_flow;
+			}
+		} else if (only_vsi) {
+			/* If the original VSIG only contains one VSI, then it
+			 * will be the requesting VSI. In this case the VSI is
+			 * not sharing entries and we can simply add the new
+			 * profile to the VSIG.
+			 */
+			status = ice_add_prof_id_vsig(hw, blk, vsig, hdl, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* Adjust priorities */
+			status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		} else {
+			/* No match, so we need a new VSIG */
+			status = ice_create_vsig_from_lst(hw, blk, vsi,
+							  &union_lst, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* Adjust priorities */
+			status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		}
+	} else {
+		/* need to find or add a VSIG */
+		/* search for an existing VSIG with an exact charc match */
+		if (ice_find_prof_vsig(hw, blk, hdl, &vsig)) {
+			/* found an exact match */
+			/* add or move VSI to the VSIG that matches */
+			status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		} else {
+			/* we did not find an exact match */
+			/* we need to add a VSIG */
+			status = ice_create_prof_id_vsig(hw, blk, vsi, hdl,
+							 &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		}
+	}
+
+	/* update hardware */
+	if (!status)
+		status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_add_prof_id_flow:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	list_for_each_entry_safe(del1, tmp1, &union_lst, list) {
+		list_del(&del1->list);
+		devm_kfree(ice_hw_to_dev(hw), del1);
+	}
+
+	return status;
+}
+
+/**
+ * ice_rem_prof_from_list - remove a profile from list
+ * @hw: pointer to the HW struct
+ * @lst: list to remove the profile from
+ * @hdl: the profile handle indicating the profile to remove
+ */
+static enum ice_status
+ice_rem_prof_from_list(struct ice_hw *hw, struct list_head *lst, u64 hdl)
+{
+	struct ice_vsig_prof *ent, *tmp;
+
+	list_for_each_entry_safe(ent, tmp, lst, list)
+		if (ent->profile_cookie == hdl) {
+			list_del(&ent->list);
+			devm_kfree(ice_hw_to_dev(hw), ent);
+			return 0;
+		}
+
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
+ * ice_rem_prof_id_flow - remove flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI from which to remove the profile specified by ID
+ * @hdl: profile tracking handle
+ *
+ * Calling this function will update the hardware tables to remove the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be disabled.
+ */
+enum ice_status
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+	struct ice_vsig_prof *tmp1, *del1;
+	struct ice_chs_chg *tmp, *del;
+	struct list_head chg, copy;
+	enum ice_status status;
+	u16 vsig;
+
+	INIT_LIST_HEAD(&copy);
+	INIT_LIST_HEAD(&chg);
+
+	/* determine if VSI is already part of a VSIG */
+	status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+	if (!status && vsig) {
+		bool last_profile;
+		bool only_vsi;
+		u16 ref;
+
+		/* found in VSIG */
+		last_profile = ice_vsig_prof_id_count(hw, blk, vsig) == 1;
+		status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+		if (status)
+			goto err_ice_rem_prof_id_flow;
+		only_vsi = (ref == 1);
+
+		if (only_vsi) {
+			/* If the original VSIG only contains one reference,
+			 * which will be the requesting VSI, then the VSI is not
+			 * sharing entries and we can simply remove the specific
+			 * characteristics from the VSIG.
+			 */
+
+			if (last_profile) {
+				/* If there are no profiles left for this VSIG,
+				 * then simply remove the the VSIG.
+				 */
+				status = ice_rem_vsig(hw, blk, vsig, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			} else {
+				status = ice_rem_prof_id_vsig(hw, blk, vsig,
+							      hdl, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+				/* Adjust priorities */
+				status = ice_adj_prof_priorities(hw, blk, vsig,
+								 &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			}
+
+		} else {
+			/* Make a copy of the VSIG's list of Profiles */
+			status = ice_get_profs_vsig(hw, blk, vsig, &copy);
+			if (status)
+				goto err_ice_rem_prof_id_flow;
+
+			/* Remove specified profile entry from the list */
+			status = ice_rem_prof_from_list(hw, &copy, hdl);
+			if (status)
+				goto err_ice_rem_prof_id_flow;
+
+			if (list_empty(&copy)) {
+				status = ice_move_vsi(hw, blk, vsi,
+						      ICE_DEFAULT_VSIG, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+			} else if (!ice_find_dup_props_vsig(hw, blk, &copy,
+							    &vsig)) {
+				/* found an exact match */
+				/* add or move VSI to the VSIG that matches */
+				/* Search for a VSIG with a matching profile
+				 * list
+				 */
+
+				/* Found match, move VSI to the matching VSIG */
+				status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			} else {
+				/* since no existing VSIG supports this
+				 * characteristic pattern, we need to create a
+				 * new VSIG and TCAM entries
+				 */
+				status = ice_create_vsig_from_lst(hw, blk, vsi,
+								  &copy, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+				/* Adjust priorities */
+				status = ice_adj_prof_priorities(hw, blk, vsig,
+								 &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			}
+		}
+	} else {
+		status = ICE_ERR_DOES_NOT_EXIST;
+	}
+
+	/* update hardware tables */
+	if (!status)
+		status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_prof_id_flow:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	list_for_each_entry_safe(del1, tmp1, &copy, list) {
+		list_del(&del1->list);
+		devm_kfree(ice_hw_to_dev(hw), del1);
+	}
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index 37eb282742d1..c7b5e1a6ea2b 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -18,6 +18,13 @@
 
 #define ICE_PKG_CNT 4
 
+enum ice_status
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     struct ice_fv_word *es);
+enum ice_status
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+enum ice_status
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
 enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len);
 enum ice_status
 ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len);
@@ -26,4 +33,6 @@ void ice_free_seg(struct ice_hw *hw);
 void ice_fill_blk_tbls(struct ice_hw *hw);
 void ice_clear_hw_tbls(struct ice_hw *hw);
 void ice_free_hw_tbls(struct ice_hw *hw);
+enum ice_status
+ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id);
 #endif /* _ICE_FLEX_PIPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index 5d5a7eaffa30..0fb3fe3ff3ea 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -3,6 +3,9 @@
 
 #ifndef _ICE_FLEX_TYPE_H_
 #define _ICE_FLEX_TYPE_H_
+
+#define ICE_FV_OFFSET_INVAL	0x1FF
+
 /* Extraction Sequence (Field Vector) Table */
 struct ice_fv_word {
 	u8 prot_id;
@@ -105,37 +108,57 @@ struct ice_buf_hdr {
 	sizeof(struct ice_buf_hdr) - (hd_sz)) / (ent_sz))
 
 /* ice package section IDs */
+#define ICE_SID_XLT0_SW			10
+#define ICE_SID_XLT_KEY_BUILDER_SW	11
 #define ICE_SID_XLT1_SW			12
 #define ICE_SID_XLT2_SW			13
 #define ICE_SID_PROFID_TCAM_SW		14
 #define ICE_SID_PROFID_REDIR_SW		15
 #define ICE_SID_FLD_VEC_SW		16
+#define ICE_SID_CDID_KEY_BUILDER_SW	17
+#define ICE_SID_CDID_REDIR_SW		18
 
+#define ICE_SID_XLT0_ACL		20
+#define ICE_SID_XLT_KEY_BUILDER_ACL	21
 #define ICE_SID_XLT1_ACL		22
 #define ICE_SID_XLT2_ACL		23
 #define ICE_SID_PROFID_TCAM_ACL		24
 #define ICE_SID_PROFID_REDIR_ACL	25
 #define ICE_SID_FLD_VEC_ACL		26
+#define ICE_SID_CDID_KEY_BUILDER_ACL	27
+#define ICE_SID_CDID_REDIR_ACL		28
 
+#define ICE_SID_XLT0_FD			30
+#define ICE_SID_XLT_KEY_BUILDER_FD	31
 #define ICE_SID_XLT1_FD			32
 #define ICE_SID_XLT2_FD			33
 #define ICE_SID_PROFID_TCAM_FD		34
 #define ICE_SID_PROFID_REDIR_FD		35
 #define ICE_SID_FLD_VEC_FD		36
+#define ICE_SID_CDID_KEY_BUILDER_FD	37
+#define ICE_SID_CDID_REDIR_FD		38
 
+#define ICE_SID_XLT0_RSS		40
+#define ICE_SID_XLT_KEY_BUILDER_RSS	41
 #define ICE_SID_XLT1_RSS		42
 #define ICE_SID_XLT2_RSS		43
 #define ICE_SID_PROFID_TCAM_RSS		44
 #define ICE_SID_PROFID_REDIR_RSS	45
 #define ICE_SID_FLD_VEC_RSS		46
+#define ICE_SID_CDID_KEY_BUILDER_RSS	47
+#define ICE_SID_CDID_REDIR_RSS		48
 
 #define ICE_SID_RXPARSER_BOOST_TCAM	56
 
+#define ICE_SID_XLT0_PE			80
+#define ICE_SID_XLT_KEY_BUILDER_PE	81
 #define ICE_SID_XLT1_PE			82
 #define ICE_SID_XLT2_PE			83
 #define ICE_SID_PROFID_TCAM_PE		84
 #define ICE_SID_PROFID_REDIR_PE		85
 #define ICE_SID_FLD_VEC_PE		86
+#define ICE_SID_CDID_KEY_BUILDER_PE	87
+#define ICE_SID_CDID_REDIR_PE		88
 
 /* Label Metadata section IDs */
 #define ICE_SID_LBL_FIRST		0x80000010
@@ -152,6 +175,19 @@ enum ice_block {
 	ICE_BLK_COUNT
 };
 
+enum ice_sect {
+	ICE_XLT0 = 0,
+	ICE_XLT_KB,
+	ICE_XLT1,
+	ICE_XLT2,
+	ICE_PROF_TCAM,
+	ICE_PROF_REDIR,
+	ICE_VEC_TBL,
+	ICE_CDID_KB,
+	ICE_CDID_REDIR,
+	ICE_SECT_COUNT
+};
+
 /* package labels */
 struct ice_label {
 	__le16 value;
@@ -234,6 +270,13 @@ struct ice_prof_redir_section {
 	u8 redir_value[1];
 };
 
+/* package buffer building */
+
+struct ice_buf_build {
+	struct ice_buf buf;
+	u16 reserved_section_table_entries;
+};
+
 struct ice_pkg_enum {
 	struct ice_buf_table *buf_table;
 	u32 buf_idx;
@@ -248,6 +291,12 @@ struct ice_pkg_enum {
 	void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset);
 };
 
+struct ice_pkg_es {
+	__le16 count;
+	__le16 offset;
+	struct ice_fv_word es[1];
+};
+
 struct ice_es {
 	u32 sid;
 	u16 count;
@@ -280,6 +329,35 @@ struct ice_ptg_ptype {
 	u8 ptg;
 };
 
+#define ICE_MAX_TCAM_PER_PROFILE	32
+#define ICE_MAX_PTG_PER_PROFILE		32
+
+struct ice_prof_map {
+	struct list_head list;
+	u64 profile_cookie;
+	u64 context;
+	u8 prof_id;
+	u8 ptg_cnt;
+	u8 ptg[ICE_MAX_PTG_PER_PROFILE];
+};
+
+#define ICE_INVALID_TCAM	0xFFFF
+
+struct ice_tcam_inf {
+	u16 tcam_idx;
+	u8 ptg;
+	u8 prof_id;
+	u8 in_use;
+};
+
+struct ice_vsig_prof {
+	struct list_head list;
+	u64 profile_cookie;
+	u8 prof_id;
+	u8 tcam_count;
+	struct ice_tcam_inf tcam[ICE_MAX_TCAM_PER_PROFILE];
+};
+
 struct ice_vsig_entry {
 	struct list_head prop_lst;
 	struct ice_vsig_vsi *first_vsi;
@@ -329,6 +407,13 @@ struct ice_xlt2 {
 	u16 count;
 };
 
+/* Profile ID Management */
+struct ice_prof_id_key {
+	__le16 flags;
+	u8 xlt1;
+	__le16 xlt2_cdid;
+} __packed;
+
 /* Keys are made up of two values, each one-half the size of the key.
  * For TCAM, the entire key is 80 bits wide (or 2, 40-bit wide values)
  */
@@ -371,4 +456,31 @@ struct ice_blk_info {
 	u8 is_list_init;
 };
 
+enum ice_chg_type {
+	ICE_TCAM_NONE = 0,
+	ICE_PTG_ES_ADD,
+	ICE_TCAM_ADD,
+	ICE_VSIG_ADD,
+	ICE_VSIG_REM,
+	ICE_VSI_MOVE,
+};
+
+struct ice_chs_chg {
+	struct list_head list_entry;
+	enum ice_chg_type type;
+
+	u8 add_ptg;
+	u8 add_vsig;
+	u8 add_tcam_idx;
+	u8 add_prof;
+	u16 ptype;
+	u8 ptg;
+	u8 prof_id;
+	u16 vsi;
+	u16 vsig;
+	u16 orig_vsig;
+	u16 tcam_idx;
+};
+
+#define ICE_FLOW_PTYPE_MAX		ICE_XLT1_CNT
 #endif /* _ICE_FLEX_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
new file mode 100644
index 000000000000..a05ceb59863b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -0,0 +1,1275 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flow.h"
+
+/* Describe properties of a protocol header field */
+struct ice_flow_field_info {
+	enum ice_flow_seg_hdr hdr;
+	s16 off;	/* Offset from start of a protocol header, in bits */
+	u16 size;	/* Size of fields in bits */
+};
+
+#define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \
+	.hdr = _hdr, \
+	.off = (_offset_bytes) * BITS_PER_BYTE, \
+	.size = (_size_bytes) * BITS_PER_BYTE, \
+}
+
+/* Table containing properties of supported protocol header fields */
+static const
+struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
+	/* IPv4 / IPv6 */
+	/* ICE_FLOW_FIELD_IDX_IPV4_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV4_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 16, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV6_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, sizeof(struct in6_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV6_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, sizeof(struct in6_addr)),
+	/* Transport */
+	/* ICE_FLOW_FIELD_IDX_TCP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_TCP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_UDP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_UDP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)),
+
+};
+
+/* Bitmaps indicating relevant packet types for a particular protocol header
+ *
+ * Packet types for packets with an Outer/First/Single IPv4 header
+ */
+static const u32 ice_ptypes_ipv4_ofos[] = {
+	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header */
+static const u32 ice_ptypes_ipv4_il[] = {
+	0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B,
+	0x0000000E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header */
+static const u32 ice_ptypes_ipv6_ofos[] = {
+	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header */
+static const u32 ice_ptypes_ipv6_il[] = {
+	0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000,
+	0x00000770, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* UDP Packet types for non-tunneled packets or tunneled
+ * packets with inner UDP.
+ */
+static const u32 ice_ptypes_udp_il[] = {
+	0x81000000, 0x20204040, 0x04000010, 0x80810102,
+	0x00000040, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last TCP header */
+static const u32 ice_ptypes_tcp_il[] = {
+	0x04000000, 0x80810102, 0x10000040, 0x02040408,
+	0x00000102, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last SCTP header */
+static const u32 ice_ptypes_sctp_il[] = {
+	0x08000000, 0x01020204, 0x20000081, 0x04080810,
+	0x00000204, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Manage parameters and info. used during the creation of a flow profile */
+struct ice_flow_prof_params {
+	enum ice_block blk;
+	u16 entry_length; /* # of bytes formatted entry will require */
+	u8 es_cnt;
+	struct ice_flow_prof *prof;
+
+	/* For ACL, the es[0] will have the data of ICE_RX_MDID_PKT_FLAGS_15_0
+	 * This will give us the direction flags.
+	 */
+	struct ice_fv_word es[ICE_MAX_FV_WORDS];
+	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
+};
+
+#define ICE_FLOW_SEG_HDRS_L3_MASK	\
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+#define ICE_FLOW_SEG_HDRS_L4_MASK	\
+	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+/**
+ * ice_flow_val_hdrs - validates packet segments for valid protocol headers
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ */
+static enum ice_status
+ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
+{
+	u8 i;
+
+	for (i = 0; i < segs_cnt; i++) {
+		/* Multiple L3 headers */
+		if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK &&
+		    !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK))
+			return ICE_ERR_PARAM;
+
+		/* Multiple L4 headers */
+		if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK &&
+		    !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK))
+			return ICE_ERR_PARAM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_proc_seg_hdrs - process protocol headers present in pkt segments
+ * @params: information about the flow to be processed
+ *
+ * This function identifies the packet types associated with the protocol
+ * headers being present in packet segments of the specified flow profile.
+ */
+static enum ice_status
+ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
+{
+	struct ice_flow_prof *prof;
+	u8 i;
+
+	memset(params->ptypes, 0xff, sizeof(params->ptypes));
+
+	prof = params->prof;
+
+	for (i = 0; i < params->prof->segs_cnt; i++) {
+		const unsigned long *src;
+		u32 hdrs;
+
+		hdrs = prof->segs[i].hdrs;
+
+		if (hdrs & ICE_FLOW_SEG_HDR_IPV4) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos :
+				(const unsigned long *)ice_ptypes_ipv4_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos :
+				(const unsigned long *)ice_ptypes_ipv6_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_UDP) {
+			src = (const unsigned long *)ice_ptypes_udp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_TCP) {
+			bitmap_and(params->ptypes, params->ptypes,
+				   (const unsigned long *)ice_ptypes_tcp_il,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) {
+			src = (const unsigned long *)ice_ptypes_sctp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_xtract_fld - Create an extraction sequence entry for the given field
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: packet segment index of the field to be extracted
+ * @fld: ID of field to be extracted
+ *
+ * This function determines the protocol ID, offset, and size of the given
+ * field. It then allocates one or more extraction sequence entries for the
+ * given field, and fill the entries with protocol ID and offset information.
+ */
+static enum ice_status
+ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
+		    u8 seg, enum ice_flow_field fld)
+{
+	enum ice_prot_id prot_id = ICE_PROT_ID_INVAL;
+	u8 fv_words = hw->blk[params->blk].es.fvw;
+	struct ice_flow_fld_info *flds;
+	u16 cnt, ese_bits, i;
+	u16 off;
+
+	flds = params->prof->segs[seg].fields;
+
+	switch (fld) {
+	case ICE_FLOW_FIELD_IDX_IPV4_SA:
+	case ICE_FLOW_FIELD_IDX_IPV4_DA:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_SA:
+	case ICE_FLOW_FIELD_IDX_IPV6_DA:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_TCP_DST_PORT:
+		prot_id = ICE_PROT_TCP_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_UDP_DST_PORT:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
+		prot_id = ICE_PROT_SCTP_IL;
+		break;
+	default:
+		return ICE_ERR_NOT_IMPL;
+	}
+
+	/* Each extraction sequence entry is a word in size, and extracts a
+	 * word-aligned offset from a protocol header.
+	 */
+	ese_bits = ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE;
+
+	flds[fld].xtrct.prot_id = prot_id;
+	flds[fld].xtrct.off = (ice_flds_info[fld].off / ese_bits) *
+		ICE_FLOW_FV_EXTRACT_SZ;
+	flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits);
+	flds[fld].xtrct.idx = params->es_cnt;
+
+	/* Adjust the next field-entry index after accommodating the number of
+	 * entries this field consumes
+	 */
+	cnt = DIV_ROUND_UP(flds[fld].xtrct.disp + ice_flds_info[fld].size,
+			   ese_bits);
+
+	/* Fill in the extraction sequence entries needed for this field */
+	off = flds[fld].xtrct.off;
+	for (i = 0; i < cnt; i++) {
+		u8 idx;
+
+		/* Make sure the number of extraction sequence required
+		 * does not exceed the block's capability
+		 */
+		if (params->es_cnt >= fv_words)
+			return ICE_ERR_MAX_LIMIT;
+
+		/* some blocks require a reversed field vector layout */
+		if (hw->blk[params->blk].es.reverse)
+			idx = fv_words - params->es_cnt - 1;
+		else
+			idx = params->es_cnt;
+
+		params->es[idx].prot_id = prot_id;
+		params->es[idx].off = off;
+		params->es_cnt++;
+
+		off += ICE_FLOW_FV_EXTRACT_SZ;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_create_xtrct_seq - Create an extraction sequence for given segments
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ *
+ * This function iterates through all matched fields in the given segments, and
+ * creates an extraction sequence for the fields.
+ */
+static enum ice_status
+ice_flow_create_xtrct_seq(struct ice_hw *hw,
+			  struct ice_flow_prof_params *params)
+{
+	struct ice_flow_prof *prof = params->prof;
+	enum ice_status status = 0;
+	u8 i;
+
+	for (i = 0; i < prof->segs_cnt; i++) {
+		u8 j;
+
+		for_each_set_bit(j, (unsigned long *)&prof->segs[i].match,
+				 ICE_FLOW_FIELD_IDX_MAX) {
+			status = ice_flow_xtract_fld(hw, params, i,
+						     (enum ice_flow_field)j);
+			if (status)
+				return status;
+		}
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_proc_segs - process all packet segments associated with a profile
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ */
+static enum ice_status
+ice_flow_proc_segs(struct ice_hw *hw, struct ice_flow_prof_params *params)
+{
+	enum ice_status status;
+
+	status = ice_flow_proc_seg_hdrs(params);
+	if (status)
+		return status;
+
+	status = ice_flow_create_xtrct_seq(hw, params);
+	if (status)
+		return status;
+
+	switch (params->blk) {
+	case ICE_BLK_RSS:
+		/* Only header information is provided for RSS configuration.
+		 * No further processing is needed.
+		 */
+		status = 0;
+		break;
+	default:
+		return ICE_ERR_NOT_IMPL;
+	}
+
+	return status;
+}
+
+#define ICE_FLOW_FIND_PROF_CHK_FLDS	0x00000001
+#define ICE_FLOW_FIND_PROF_CHK_VSI	0x00000002
+#define ICE_FLOW_FIND_PROF_NOT_CHK_DIR	0x00000004
+
+/**
+ * ice_flow_find_prof_conds - Find a profile matching headers and conditions
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @vsi_handle: software VSI handle to check VSI (ICE_FLOW_FIND_PROF_CHK_VSI)
+ * @conds: additional conditions to be checked (ICE_FLOW_FIND_PROF_CHK_*)
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_conds(struct ice_hw *hw, enum ice_block blk,
+			 enum ice_flow_dir dir, struct ice_flow_seg_info *segs,
+			 u8 segs_cnt, u16 vsi_handle, u32 conds)
+{
+	struct ice_flow_prof *p, *prof = NULL;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+	list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+		if ((p->dir == dir || conds & ICE_FLOW_FIND_PROF_NOT_CHK_DIR) &&
+		    segs_cnt && segs_cnt == p->segs_cnt) {
+			u8 i;
+
+			/* Check for profile-VSI association if specified */
+			if ((conds & ICE_FLOW_FIND_PROF_CHK_VSI) &&
+			    ice_is_vsi_valid(hw, vsi_handle) &&
+			    !test_bit(vsi_handle, p->vsis))
+				continue;
+
+			/* Protocol headers must be checked. Matched fields are
+			 * checked if specified.
+			 */
+			for (i = 0; i < segs_cnt; i++)
+				if (segs[i].hdrs != p->segs[i].hdrs ||
+				    ((conds & ICE_FLOW_FIND_PROF_CHK_FLDS) &&
+				     segs[i].match != p->segs[i].match))
+					break;
+
+			/* A match is found if all segments are matched */
+			if (i == segs_cnt) {
+				prof = p;
+				break;
+			}
+		}
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return prof;
+}
+
+/**
+ * ice_flow_find_prof_id - Look up a profile with given profile ID
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: unique ID to identify this flow profile
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+	struct ice_flow_prof *p;
+
+	list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+		if (p->id == prof_id)
+			return p;
+
+	return NULL;
+}
+
+/**
+ * ice_flow_add_prof_sync - Add a flow profile for packet segments and fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static enum ice_status
+ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
+		       enum ice_flow_dir dir, u64 prof_id,
+		       struct ice_flow_seg_info *segs, u8 segs_cnt,
+		       struct ice_flow_prof **prof)
+{
+	struct ice_flow_prof_params params;
+	enum ice_status status;
+	u8 i;
+
+	if (!prof)
+		return ICE_ERR_BAD_PTR;
+
+	memset(&params, 0, sizeof(params));
+	params.prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*params.prof),
+				   GFP_KERNEL);
+	if (!params.prof)
+		return ICE_ERR_NO_MEMORY;
+
+	/* initialize extraction sequence to all invalid (0xff) */
+	for (i = 0; i < ICE_MAX_FV_WORDS; i++) {
+		params.es[i].prot_id = ICE_PROT_INVALID;
+		params.es[i].off = ICE_FV_OFFSET_INVAL;
+	}
+
+	params.blk = blk;
+	params.prof->id = prof_id;
+	params.prof->dir = dir;
+	params.prof->segs_cnt = segs_cnt;
+
+	/* Make a copy of the segments that need to be persistent in the flow
+	 * profile instance
+	 */
+	for (i = 0; i < segs_cnt; i++)
+		memcpy(&params.prof->segs[i], &segs[i], sizeof(*segs));
+
+	status = ice_flow_proc_segs(hw, &params);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW,
+			  "Error processing a flow's packet segments\n");
+		goto out;
+	}
+
+	/* Add a HW profile for this flow profile */
+	status = ice_add_prof(hw, blk, prof_id, (u8 *)params.ptypes, params.es);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&params.prof->entries);
+	mutex_init(&params.prof->entries_lock);
+	*prof = params.prof;
+
+out:
+	if (status)
+		devm_kfree(ice_hw_to_dev(hw), params.prof);
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_prof_sync - remove a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile to remove
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static enum ice_status
+ice_flow_rem_prof_sync(struct ice_hw *hw, enum ice_block blk,
+		       struct ice_flow_prof *prof)
+{
+	enum ice_status status;
+
+	/* Remove all hardware profiles associated with this flow profile */
+	status = ice_rem_prof(hw, blk, prof->id);
+	if (!status) {
+		list_del(&prof->l_entry);
+		mutex_destroy(&prof->entries_lock);
+		devm_kfree(ice_hw_to_dev(hw), prof);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_assoc_prof - associate a VSI with a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static enum ice_status
+ice_flow_assoc_prof(struct ice_hw *hw, enum ice_block blk,
+		    struct ice_flow_prof *prof, u16 vsi_handle)
+{
+	enum ice_status status = 0;
+
+	if (!test_bit(vsi_handle, prof->vsis)) {
+		status = ice_add_prof_id_flow(hw, blk,
+					      ice_get_hw_vsi_num(hw,
+								 vsi_handle),
+					      prof->id);
+		if (!status)
+			set_bit(vsi_handle, prof->vsis);
+		else
+			ice_debug(hw, ICE_DBG_FLOW,
+				  "HW profile add failed, %d\n",
+				  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_disassoc_prof - disassociate a VSI from a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static enum ice_status
+ice_flow_disassoc_prof(struct ice_hw *hw, enum ice_block blk,
+		       struct ice_flow_prof *prof, u16 vsi_handle)
+{
+	enum ice_status status = 0;
+
+	if (test_bit(vsi_handle, prof->vsis)) {
+		status = ice_rem_prof_id_flow(hw, blk,
+					      ice_get_hw_vsi_num(hw,
+								 vsi_handle),
+					      prof->id);
+		if (!status)
+			clear_bit(vsi_handle, prof->vsis);
+		else
+			ice_debug(hw, ICE_DBG_FLOW,
+				  "HW profile remove failed, %d\n",
+				  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_add_prof - Add a flow profile for packet segments and matched fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ */
+static enum ice_status
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+		  u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+		  struct ice_flow_prof **prof)
+{
+	enum ice_status status;
+
+	if (segs_cnt > ICE_FLOW_SEG_MAX)
+		return ICE_ERR_MAX_LIMIT;
+
+	if (!segs_cnt)
+		return ICE_ERR_PARAM;
+
+	if (!segs)
+		return ICE_ERR_BAD_PTR;
+
+	status = ice_flow_val_hdrs(segs, segs_cnt);
+	if (status)
+		return status;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	status = ice_flow_add_prof_sync(hw, blk, dir, prof_id, segs, segs_cnt,
+					prof);
+	if (!status)
+		list_add(&(*prof)->l_entry, &hw->fl_profs[blk]);
+
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_prof - Remove a flow profile and all entries associated with it
+ * @hw: pointer to the HW struct
+ * @blk: the block for which the flow profile is to be removed
+ * @prof_id: unique ID of the flow profile to be removed
+ */
+static enum ice_status
+ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+	struct ice_flow_prof *prof;
+	enum ice_status status;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	prof = ice_flow_find_prof_id(hw, blk, prof_id);
+	if (!prof) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto out;
+	}
+
+	/* prof becomes invalid after the call */
+	status = ice_flow_rem_prof_sync(hw, blk, prof);
+
+out:
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return status;
+}
+
+/**
+ * ice_flow_set_fld_ext - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @type: type of the field
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ *           entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ *            input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ *            entry's input buffer
+ *
+ * This helper function stores information of a field being matched, including
+ * the type of the field and the locations of the value to match, the mask, and
+ * and the upper-bound value in the start of the input buffer for a flow entry.
+ * This function should only be used for fixed-size data structures.
+ *
+ * This function also opportunistically determines the protocol headers to be
+ * present based on the fields being set. Some fields cannot be used alone to
+ * determine the protocol headers present. Sometimes, fields for particular
+ * protocol headers are not matched. In those cases, the protocol headers
+ * must be explicitly set.
+ */
+static void
+ice_flow_set_fld_ext(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		     enum ice_flow_fld_match_type type, u16 val_loc,
+		     u16 mask_loc, u16 last_loc)
+{
+	u64 bit = BIT_ULL(fld);
+
+	seg->match |= bit;
+	if (type == ICE_FLOW_FLD_TYPE_RANGE)
+		seg->range |= bit;
+
+	seg->fields[fld].type = type;
+	seg->fields[fld].src.val = val_loc;
+	seg->fields[fld].src.mask = mask_loc;
+	seg->fields[fld].src.last = last_loc;
+
+	ICE_FLOW_SET_HDRS(seg, ice_flds_info[fld].hdr);
+}
+
+/**
+ * ice_flow_set_fld - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ *           entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ *            input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ *            entry's input buffer
+ * @range: indicate if field being matched is to be in a range
+ *
+ * This function specifies the locations, in the form of byte offsets from the
+ * start of the input buffer for a flow entry, from where the value to match,
+ * the mask value, and upper value can be extracted. These locations are then
+ * stored in the flow profile. When adding a flow entry associated with the
+ * flow profile, these locations will be used to quickly extract the values and
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+static void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range)
+{
+	enum ice_flow_fld_match_type t = range ?
+		ICE_FLOW_FLD_TYPE_RANGE : ICE_FLOW_FLD_TYPE_REG;
+
+	ice_flow_set_fld_ext(seg, fld, t, val_loc, mask_loc, last_loc);
+}
+
+#define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+
+#define ICE_FLOW_RSS_SEG_HDR_L4_MASKS \
+	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+#define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \
+	(ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
+	 ICE_FLOW_RSS_SEG_HDR_L4_MASKS)
+
+/**
+ * ice_flow_set_rss_seg_info - setup packet segments for RSS
+ * @segs: pointer to the flow field segment(s)
+ * @hash_fields: fields to be hashed on for the segment(s)
+ * @flow_hdr: protocol header fields within a packet segment
+ *
+ * Helper function to extract fields from hash bitmap and use flow
+ * header value to set flow field segment for further use in flow
+ * profile entry or removal.
+ */
+static enum ice_status
+ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields,
+			  u32 flow_hdr)
+{
+	u64 val;
+	u8 i;
+
+	for_each_set_bit(i, (unsigned long *)&hash_fields,
+			 ICE_FLOW_FIELD_IDX_MAX)
+		ice_flow_set_fld(segs, (enum ice_flow_field)i,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	ICE_FLOW_SET_HDRS(segs, flow_hdr);
+
+	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS)
+		return ICE_ERR_PARAM;
+
+	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS);
+	if (val && !is_power_of_2(val))
+		return ICE_ERR_CFG;
+
+	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L4_MASKS);
+	if (val && !is_power_of_2(val))
+		return ICE_ERR_CFG;
+
+	return 0;
+}
+
+/**
+ * ice_rem_vsi_rss_list - remove VSI from RSS list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * Remove the VSI from all RSS configurations in the list.
+ */
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_rss_cfg *r, *tmp;
+
+	if (list_empty(&hw->rss_list_head))
+		return;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+		if (test_and_clear_bit(vsi_handle, r->vsis))
+			if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+				list_del(&r->l_entry);
+				devm_kfree(ice_hw_to_dev(hw), r);
+			}
+	mutex_unlock(&hw->rss_locks);
+}
+
+/**
+ * ice_rem_vsi_rss_cfg - remove RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function will iterate through all flow profiles and disassociate
+ * the VSI from that profile. If the flow profile has no VSIs it will
+ * be removed.
+ */
+enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_prof *p, *t;
+	enum ice_status status = 0;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	if (list_empty(&hw->fl_profs[blk]))
+		return 0;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+	list_for_each_entry_safe(p, t, &hw->fl_profs[blk], l_entry)
+		if (test_bit(vsi_handle, p->vsis)) {
+			status = ice_flow_disassoc_prof(hw, blk, p, vsi_handle);
+			if (status)
+				break;
+
+			if (bitmap_empty(p->vsis, ICE_MAX_VSI)) {
+				status = ice_flow_rem_prof_sync(hw, blk, p);
+				if (status)
+					break;
+			}
+		}
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return status;
+}
+
+/**
+ * ice_rem_rss_list - remove RSS configuration from list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static void
+ice_rem_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+	struct ice_rss_cfg *r, *tmp;
+
+	/* Search for RSS hash fields associated to the VSI that match the
+	 * hash configurations associated to the flow profile. If found
+	 * remove from the RSS entry list of the VSI context and delete entry.
+	 */
+	list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+		if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+		    r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+			clear_bit(vsi_handle, r->vsis);
+			if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+				list_del(&r->l_entry);
+				devm_kfree(ice_hw_to_dev(hw), r);
+			}
+			return;
+		}
+}
+
+/**
+ * ice_add_rss_list - add RSS configuration to list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static enum ice_status
+ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+	struct ice_rss_cfg *r, *rss_cfg;
+
+	list_for_each_entry(r, &hw->rss_list_head, l_entry)
+		if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+		    r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+			set_bit(vsi_handle, r->vsis);
+			return 0;
+		}
+
+	rss_cfg = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rss_cfg),
+			       GFP_KERNEL);
+	if (!rss_cfg)
+		return ICE_ERR_NO_MEMORY;
+
+	rss_cfg->hashed_flds = prof->segs[prof->segs_cnt - 1].match;
+	rss_cfg->packet_hdr = prof->segs[prof->segs_cnt - 1].hdrs;
+	set_bit(vsi_handle, rss_cfg->vsis);
+
+	list_add_tail(&rss_cfg->l_entry, &hw->rss_list_head);
+
+	return 0;
+}
+
+#define ICE_FLOW_PROF_HASH_S	0
+#define ICE_FLOW_PROF_HASH_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_HASH_S)
+#define ICE_FLOW_PROF_HDR_S	32
+#define ICE_FLOW_PROF_HDR_M	(0x3FFFFFFFULL << ICE_FLOW_PROF_HDR_S)
+#define ICE_FLOW_PROF_ENCAP_S	63
+#define ICE_FLOW_PROF_ENCAP_M	(BIT_ULL(ICE_FLOW_PROF_ENCAP_S))
+
+#define ICE_RSS_OUTER_HEADERS	1
+
+/* Flow profile ID format:
+ * [0:31] - Packet match fields
+ * [32:62] - Protocol header
+ * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled
+ */
+#define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \
+	(u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \
+	      (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \
+	      ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0))
+
+/**
+ * ice_add_rss_cfg_sync - add an RSS configuration
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ * @segs_cnt: packet segment count
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static enum ice_status
+ice_add_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		     u32 addl_hdrs, u8 segs_cnt)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_flow_seg_info *segs;
+	enum ice_status status;
+
+	if (!segs_cnt || segs_cnt > ICE_FLOW_SEG_MAX)
+		return ICE_ERR_PARAM;
+
+	segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL);
+	if (!segs)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Construct the packet segment info from the hashed fields */
+	status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds,
+					   addl_hdrs);
+	if (status)
+		goto exit;
+
+	/* Search for a flow profile that has matching headers, hash fields
+	 * and has the input VSI associated to it. If found, no further
+	 * operations required and exit.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS |
+					ICE_FLOW_FIND_PROF_CHK_VSI);
+	if (prof)
+		goto exit;
+
+	/* Check if a flow profile exists with the same protocol headers and
+	 * associated with the input VSI. If so disassociate the VSI from
+	 * this profile. The VSI will be added to a new profile created with
+	 * the protocol header and new hash field configuration.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle, ICE_FLOW_FIND_PROF_CHK_VSI);
+	if (prof) {
+		status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle);
+		if (!status)
+			ice_rem_rss_list(hw, vsi_handle, prof);
+		else
+			goto exit;
+
+		/* Remove profile if it has no VSIs associated */
+		if (bitmap_empty(prof->vsis, ICE_MAX_VSI)) {
+			status = ice_flow_rem_prof(hw, blk, prof->id);
+			if (status)
+				goto exit;
+		}
+	}
+
+	/* Search for a profile that has same match fields only. If this
+	 * exists then associate the VSI to this profile.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS);
+	if (prof) {
+		status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+		if (!status)
+			status = ice_add_rss_list(hw, vsi_handle, prof);
+		goto exit;
+	}
+
+	/* Create a new flow profile with generated profile and packet
+	 * segment information.
+	 */
+	status = ice_flow_add_prof(hw, blk, ICE_FLOW_RX,
+				   ICE_FLOW_GEN_PROFID(hashed_flds,
+						       segs[segs_cnt - 1].hdrs,
+						       segs_cnt),
+				   segs, segs_cnt, &prof);
+	if (status)
+		goto exit;
+
+	status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+	/* If association to a new flow profile failed then this profile can
+	 * be removed.
+	 */
+	if (status) {
+		ice_flow_rem_prof(hw, blk, prof->id);
+		goto exit;
+	}
+
+	status = ice_add_rss_list(hw, vsi_handle, prof);
+
+exit:
+	kfree(segs);
+	return status;
+}
+
+/**
+ * ice_add_rss_cfg - add an RSS configuration with specified hashed fields
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ *
+ * This function will generate a flow profile based on fields associated with
+ * the input fields to hash on, the flow type and use the VSI number to add
+ * a flow entry to the profile.
+ */
+enum ice_status
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs)
+{
+	enum ice_status status;
+
+	if (hashed_flds == ICE_HASH_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->rss_locks);
+	status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
+				      ICE_RSS_OUTER_HEADERS);
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/* Mapping of AVF hash bit fields to an L3-L4 hash combination.
+ * As the ice_flow_avf_hdr_field represent individual bit shifts in a hash,
+ * convert its values to their appropriate flow L3, L4 values.
+ */
+#define ICE_FLOW_AVF_RSS_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4))
+#define ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP))
+#define ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS \
+	(ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS | \
+	 ICE_FLOW_AVF_RSS_IPV4_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP))
+
+#define ICE_FLOW_AVF_RSS_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6))
+#define ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP))
+#define ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS \
+	(ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS | \
+	 ICE_FLOW_AVF_RSS_IPV6_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP))
+
+/**
+ * ice_add_avf_rss_cfg - add an RSS configuration for AVF driver
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @avf_hash: hash bit fields (ICE_AVF_FLOW_FIELD_*) to configure
+ *
+ * This function will take the hash bitmap provided by the AVF driver via a
+ * message, convert it to ICE-compatible values, and configure RSS flow
+ * profiles.
+ */
+enum ice_status
+ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 avf_hash)
+{
+	enum ice_status status = 0;
+	u64 hash_flds;
+
+	if (avf_hash == ICE_AVF_FLOW_FIELD_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	/* Make sure no unsupported bits are specified */
+	if (avf_hash & ~(ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS |
+			 ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS))
+		return ICE_ERR_CFG;
+
+	hash_flds = avf_hash;
+
+	/* Always create an L3 RSS configuration for any L4 RSS configuration */
+	if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS)
+		hash_flds |= ICE_FLOW_AVF_RSS_IPV4_MASKS;
+
+	if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS)
+		hash_flds |= ICE_FLOW_AVF_RSS_IPV6_MASKS;
+
+	/* Create the corresponding RSS configuration for each valid hash bit */
+	while (hash_flds) {
+		u64 rss_hash = ICE_HASH_INVALID;
+
+		if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS) {
+			if (hash_flds & ICE_FLOW_AVF_RSS_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_IPV4_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_TCP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_UDP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS;
+			} else if (hash_flds &
+				   BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP)) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_SCTP_PORT;
+				hash_flds &=
+					~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP);
+			}
+		} else if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS) {
+			if (hash_flds & ICE_FLOW_AVF_RSS_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_IPV6_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_TCP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_UDP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS;
+			} else if (hash_flds &
+				   BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP)) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_SCTP_PORT;
+				hash_flds &=
+					~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP);
+			}
+		}
+
+		if (rss_hash == ICE_HASH_INVALID)
+			return ICE_ERR_OUT_OF_RANGE;
+
+		status = ice_add_rss_cfg(hw, vsi_handle, rss_hash,
+					 ICE_FLOW_SEG_HDR_NONE);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * ice_replay_rss_cfg - replay RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ */
+enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+	enum ice_status status = 0;
+	struct ice_rss_cfg *r;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry(r, &hw->rss_list_head, l_entry) {
+		if (test_bit(vsi_handle, r->vsis)) {
+			status = ice_add_rss_cfg_sync(hw, vsi_handle,
+						      r->hashed_flds,
+						      r->packet_hdr,
+						      ICE_RSS_OUTER_HEADERS);
+			if (status)
+				break;
+		}
+	}
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/**
+ * ice_get_rss_cfg - returns hashed fields for the given header types
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hdrs: protocol header type
+ *
+ * This function will return the match fields of the first instance of flow
+ * profile having the given header types and containing input VSI
+ */
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs)
+{
+	struct ice_rss_cfg *r, *rss_cfg = NULL;
+
+	/* verify if the protocol header is non zero and VSI is valid */
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE || !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_HASH_INVALID;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry(r, &hw->rss_list_head, l_entry)
+		if (test_bit(vsi_handle, r->vsis) &&
+		    r->packet_hdr == hdrs) {
+			rss_cfg = r;
+			break;
+		}
+	mutex_unlock(&hw->rss_locks);
+
+	return rss_cfg ? rss_cfg->hashed_flds : ICE_HASH_INVALID;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
new file mode 100644
index 000000000000..5558627bd5eb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLOW_H_
+#define _ICE_FLOW_H_
+
+#define ICE_FLOW_ENTRY_HANDLE_INVAL	0
+#define ICE_FLOW_FLD_OFF_INVAL		0xffff
+
+/* Generate flow hash field from flow field type(s) */
+#define ICE_FLOW_HASH_IPV4	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA))
+#define ICE_FLOW_HASH_IPV6	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA))
+#define ICE_FLOW_HASH_TCP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT))
+#define ICE_FLOW_HASH_UDP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT))
+#define ICE_FLOW_HASH_SCTP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT))
+
+#define ICE_HASH_INVALID	0
+#define ICE_HASH_TCP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_TCP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_UDP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT)
+#define ICE_HASH_UDP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT)
+
+/* Protocol header fields within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization such as GRE,
+ * VxLAN, etc.
+ */
+enum ice_flow_seg_hdr {
+	ICE_FLOW_SEG_HDR_NONE		= 0x00000000,
+	ICE_FLOW_SEG_HDR_IPV4		= 0x00000004,
+	ICE_FLOW_SEG_HDR_IPV6		= 0x00000008,
+	ICE_FLOW_SEG_HDR_TCP		= 0x00000040,
+	ICE_FLOW_SEG_HDR_UDP		= 0x00000080,
+	ICE_FLOW_SEG_HDR_SCTP		= 0x00000100,
+};
+
+enum ice_flow_field {
+	/* L3 */
+	ICE_FLOW_FIELD_IDX_IPV4_SA,
+	ICE_FLOW_FIELD_IDX_IPV4_DA,
+	ICE_FLOW_FIELD_IDX_IPV6_SA,
+	ICE_FLOW_FIELD_IDX_IPV6_DA,
+	/* L4 */
+	ICE_FLOW_FIELD_IDX_TCP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_TCP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_UDP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_UDP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_SCTP_DST_PORT,
+	/* The total number of enums must not exceed 64 */
+	ICE_FLOW_FIELD_IDX_MAX
+};
+
+/* Flow headers and fields for AVF support */
+enum ice_flow_avf_hdr_field {
+	/* Values 0 - 28 are reserved for future use */
+	ICE_AVF_FLOW_FIELD_INVALID		= 0,
+	ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP	= 29,
+	ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP,
+	ICE_AVF_FLOW_FIELD_IPV4_UDP,
+	ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK,
+	ICE_AVF_FLOW_FIELD_IPV4_TCP,
+	ICE_AVF_FLOW_FIELD_IPV4_SCTP,
+	ICE_AVF_FLOW_FIELD_IPV4_OTHER,
+	ICE_AVF_FLOW_FIELD_FRAG_IPV4,
+	/* Values 37-38 are reserved */
+	ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP	= 39,
+	ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP,
+	ICE_AVF_FLOW_FIELD_IPV6_UDP,
+	ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK,
+	ICE_AVF_FLOW_FIELD_IPV6_TCP,
+	ICE_AVF_FLOW_FIELD_IPV6_SCTP,
+	ICE_AVF_FLOW_FIELD_IPV6_OTHER,
+	ICE_AVF_FLOW_FIELD_FRAG_IPV6,
+	ICE_AVF_FLOW_FIELD_RSVD47,
+	ICE_AVF_FLOW_FIELD_FCOE_OX,
+	ICE_AVF_FLOW_FIELD_FCOE_RX,
+	ICE_AVF_FLOW_FIELD_FCOE_OTHER,
+	/* Values 51-62 are reserved */
+	ICE_AVF_FLOW_FIELD_L2_PAYLOAD		= 63,
+	ICE_AVF_FLOW_FIELD_MAX
+};
+
+/* Supported RSS offloads  This macro is defined to support
+ * VIRTCHNL_OP_GET_RSS_HENA_CAPS ops. PF driver sends the RSS hardware
+ * capabilities to the caller of this ops.
+ */
+#define ICE_DEFAULT_RSS_HENA ( \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP))
+
+enum ice_flow_dir {
+	ICE_FLOW_RX		= 0x02,
+};
+
+enum ice_flow_priority {
+	ICE_FLOW_PRIO_LOW,
+	ICE_FLOW_PRIO_NORMAL,
+	ICE_FLOW_PRIO_HIGH
+};
+
+#define ICE_FLOW_SEG_MAX		2
+#define ICE_FLOW_FV_EXTRACT_SZ		2
+
+#define ICE_FLOW_SET_HDRS(seg, val)	((seg)->hdrs |= (u32)(val))
+
+struct ice_flow_seg_xtrct {
+	u8 prot_id;	/* Protocol ID of extracted header field */
+	u16 off;	/* Starting offset of the field in header in bytes */
+	u8 idx;		/* Index of FV entry used */
+	u8 disp;	/* Displacement of field in bits fr. FV entry's start */
+};
+
+enum ice_flow_fld_match_type {
+	ICE_FLOW_FLD_TYPE_REG,		/* Value, mask */
+	ICE_FLOW_FLD_TYPE_RANGE,	/* Value, mask, last (upper bound) */
+	ICE_FLOW_FLD_TYPE_PREFIX,	/* IP address, prefix, size of prefix */
+	ICE_FLOW_FLD_TYPE_SIZE,		/* Value, mask, size of match */
+};
+
+struct ice_flow_fld_loc {
+	/* Describe offsets of field information relative to the beginning of
+	 * input buffer provided when adding flow entries.
+	 */
+	u16 val;	/* Offset where the value is located */
+	u16 mask;	/* Offset where the mask/prefix value is located */
+	u16 last;	/* Length or offset where the upper value is located */
+};
+
+struct ice_flow_fld_info {
+	enum ice_flow_fld_match_type type;
+	/* Location where to retrieve data from an input buffer */
+	struct ice_flow_fld_loc src;
+	/* Location where to put the data into the final entry buffer */
+	struct ice_flow_fld_loc entry;
+	struct ice_flow_seg_xtrct xtrct;
+};
+
+struct ice_flow_seg_info {
+	u32 hdrs;	/* Bitmask indicating protocol headers present */
+	u64 match;	/* Bitmask indicating header fields to be matched */
+	u64 range;	/* Bitmask indicating header fields matched as ranges */
+
+	struct ice_flow_fld_info fields[ICE_FLOW_FIELD_IDX_MAX];
+};
+
+struct ice_flow_prof {
+	struct list_head l_entry;
+
+	u64 id;
+	enum ice_flow_dir dir;
+	u8 segs_cnt;
+
+	/* Keep track of flow entries associated with this flow profile */
+	struct mutex entries_lock;
+	struct list_head entries;
+
+	struct ice_flow_seg_info segs[ICE_FLOW_SEG_MAX];
+
+	/* software VSI handles referenced by this flow profile */
+	DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+};
+
+struct ice_rss_cfg {
+	struct list_head l_entry;
+	/* bitmap of VSIs added to the RSS entry */
+	DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+	u64 hashed_flds;
+	u32 packet_hdr;
+};
+
+enum ice_status ice_flow_rem_entry(struct ice_hw *hw, u64 entry_h);
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status
+ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds);
+enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+enum ice_status
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs);
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs);
+#endif /* _ICE_FLOW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index e8f32350fed2..f2cababf2561 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -60,15 +60,6 @@
 #define PRTDCB_GENS_DCBX_STATUS_M		ICE_M(0x7, 0)
 #define GL_PREEXT_L2_PMASK0(_i)			(0x0020F0FC + ((_i) * 4))
 #define GL_PREEXT_L2_PMASK1(_i)			(0x0020F108 + ((_i) * 4))
-#define GLFLXP_RXDID_FLAGS(_i, _j)		(0x0045D000 + ((_i) * 4 + (_j) * 256))
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S	0
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M	ICE_M(0x3F, 0)
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S	8
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M	ICE_M(0x3F, 8)
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S	16
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M	ICE_M(0x3F, 16)
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S	24
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M	ICE_M(0x3F, 24)
 #define GLFLXP_RXDID_FLX_WRD_0(_i)		(0x0045c800 + ((_i) * 4))
 #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S	0
 #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M	ICE_M(0xFF, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 0997d352709b..878e125d8b42 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -199,6 +199,14 @@ enum ice_rxdid {
 /* Receive Flex Descriptor Rx opcode values */
 #define ICE_RX_OPC_MDID		0x01
 
+/* Receive Descriptor MDID values that access packet flags */
+enum ice_flex_mdid_pkt_flags {
+	ICE_RX_MDID_PKT_FLAGS_15_0	= 20,
+	ICE_RX_MDID_PKT_FLAGS_31_16,
+	ICE_RX_MDID_PKT_FLAGS_47_32,
+	ICE_RX_MDID_PKT_FLAGS_63_48,
+};
+
 /* Receive Descriptor MDID values */
 enum ice_flex_rx_mdid {
 	ICE_RX_MDID_FLOW_ID_LOWER	= 5,
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index e7449248fab4..1874c9f51a32 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3,6 +3,7 @@
 
 #include "ice.h"
 #include "ice_base.h"
+#include "ice_flow.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
@@ -493,7 +494,28 @@ bool ice_is_safe_mode(struct ice_pf *pf)
 }
 
 /**
- * ice_rss_clean - Delete RSS related VSI structures that hold user inputs
+ * ice_vsi_clean_rss_flow_fld - Delete RSS configuration
+ * @vsi: the VSI being cleaned up
+ *
+ * This function deletes RSS input set for all flows that were configured
+ * for this VSI
+ */
+static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+
+	if (ice_is_safe_mode(pf))
+		return;
+
+	status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
+	if (status)
+		dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %d\n",
+			vsi->vsi_num, status);
+}
+
+/**
+ * ice_rss_clean - Delete RSS related VSI structures and configuration
  * @vsi: the VSI being removed
  */
 static void ice_rss_clean(struct ice_vsi *vsi)
@@ -507,6 +529,11 @@ static void ice_rss_clean(struct ice_vsi *vsi)
 		devm_kfree(dev, vsi->rss_hkey_user);
 	if (vsi->rss_lut_user)
 		devm_kfree(dev, vsi->rss_lut_user);
+
+	ice_vsi_clean_rss_flow_fld(vsi);
+	/* remove RSS replay list */
+	if (!ice_is_safe_mode(pf))
+		ice_rem_vsi_rss_list(&pf->hw, vsi->idx);
 }
 
 /**
@@ -817,12 +844,23 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
 		ctxt->info.valid_sections |=
 			cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
 
-	/* Enable MAC Antispoof with new VSI being initialized or updated */
-	if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) {
+	/* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off
+	 * respectively
+	 */
+	if (vsi->type == ICE_VSI_VF) {
 		ctxt->info.valid_sections |=
 			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
-		ctxt->info.sec_flags |=
-			ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+		if (pf->vf[vsi->vf_id].spoofchk) {
+			ctxt->info.sec_flags |=
+				ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+				(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+		} else {
+			ctxt->info.sec_flags &=
+				~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+				  (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				   ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
+		}
 	}
 
 	/* Allow control frames out of main VSI */
@@ -1076,6 +1114,115 @@ ice_vsi_cfg_rss_exit:
 }
 
 /**
+ * ice_vsi_set_vf_rss_flow_fld - Sets VF VSI RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called during the VF VSI setup. Upon successful
+ * completion of package download, this function will configure default RSS
+ * input sets for VF VSI.
+ */
+static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA);
+	if (status)
+		dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %d\n",
+			vsi->vsi_num, status);
+}
+
+/**
+ * ice_vsi_set_rss_flow_fld - Sets RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called after successful download package call
+ * during initialization of PF. Since the downloaded package will erase the
+ * RSS section, this function will configure RSS input sets for different
+ * flow types. The last profile added has the highest priority, therefore 2
+ * tuple profiles (i.e. IPv4 src/dst) are added before 4 tuple profiles
+ * (i.e. IPv4 src/dst TCP src/dst port).
+ */
+static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
+{
+	u16 vsi_handle = vsi->idx, vsi_num = vsi->vsi_num;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi_num);
+		return;
+	}
+	/* configure RSS for IPv4 with input set IP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+				 ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for IPv6 with input set IPv6 src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+				 ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4,
+				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for udp4 with input set IP src/dst, UDP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4,
+				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for sctp4 with input set IP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6,
+				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6,
+				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for sctp6 with input set IPv6 src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+}
+
+/**
  * ice_add_mac_to_list - Add a MAC address filter entry to the list
  * @vsi: the VSI to be forwarded to
  * @add_list: pointer to the list which contains MAC filter entries
@@ -1636,22 +1783,14 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
 
 	ctxt->info = vsi->info;
 
-	if (ena) {
-		ctxt->info.sec_flags |=
-			ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
-			ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S;
+	if (ena)
 		ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
-	} else {
-		ctxt->info.sec_flags &=
-			~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
-			  ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+	else
 		ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
-	}
 
 	if (!vlan_promisc)
 		ctxt->info.valid_sections =
-			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID |
-				    ICE_AQ_VSI_PROP_SW_VALID);
+			cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
 
 	status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
 	if (status) {
@@ -1661,7 +1800,6 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
 		goto err_out;
 	}
 
-	vsi->info.sec_flags = ctxt->info.sec_flags;
 	vsi->info.sw_flags2 = ctxt->info.sw_flags2;
 
 	kfree(ctxt);
@@ -1899,8 +2037,10 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 		 * receive traffic on first queue. Hence no need to capture
 		 * return value
 		 */
-		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
 			ice_vsi_cfg_rss_lut_key(vsi);
+			ice_vsi_set_rss_flow_fld(vsi);
+		}
 		break;
 	case ICE_VSI_VF:
 		/* VF driver will take care of creating netdev for this type and
@@ -1924,8 +2064,10 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 		 * receive traffic on first queue. Hence no need to capture
 		 * return value
 		 */
-		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
 			ice_vsi_cfg_rss_lut_key(vsi);
+			ice_vsi_set_vf_rss_flow_fld(vsi);
+		}
 		break;
 	case ICE_VSI_LB:
 		ret = ice_vsi_alloc_rings(vsi);
@@ -2402,6 +2544,97 @@ int ice_vsi_release(struct ice_vsi *vsi)
 }
 
 /**
+ * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector
+ * @q_vector: pointer to q_vector which is being updated
+ * @coalesce: pointer to array of struct with stored coalesce
+ *
+ * Set coalesce param in q_vector and update these parameters in HW.
+ */
+static void
+ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector,
+				struct ice_coalesce_stored *coalesce)
+{
+	struct ice_ring_container *rx_rc = &q_vector->rx;
+	struct ice_ring_container *tx_rc = &q_vector->tx;
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	tx_rc->itr_setting = coalesce->itr_tx;
+	rx_rc->itr_setting = coalesce->itr_rx;
+
+	/* dynamic ITR values will be updated during Tx/Rx */
+	if (!ITR_IS_DYNAMIC(tx_rc->itr_setting))
+		wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(tx_rc->itr_setting) >>
+		     ICE_ITR_GRAN_S);
+	if (!ITR_IS_DYNAMIC(rx_rc->itr_setting))
+		wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rx_rc->itr_setting) >>
+		     ICE_ITR_GRAN_S);
+
+	q_vector->intrl = coalesce->intrl;
+	wr32(hw, GLINT_RATE(q_vector->reg_idx),
+	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+}
+
+/**
+ * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: array of struct with stored coalesce
+ *
+ * Returns array size.
+ */
+static int
+ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
+			     struct ice_coalesce_stored *coalesce)
+{
+	int i;
+
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		coalesce[i].itr_tx = q_vector->tx.itr_setting;
+		coalesce[i].itr_rx = q_vector->rx.itr_setting;
+		coalesce[i].intrl = q_vector->intrl;
+	}
+
+	return vsi->num_q_vectors;
+}
+
+/**
+ * ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: pointer to array of struct with stored coalesce
+ * @size: size of coalesce array
+ *
+ * Before this function, ice_vsi_rebuild_get_coalesce should be called to save
+ * ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce
+ * to default value.
+ */
+static void
+ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+			     struct ice_coalesce_stored *coalesce, int size)
+{
+	int i;
+
+	if ((size && !coalesce) || !vsi)
+		return;
+
+	for (i = 0; i < size && i < vsi->num_q_vectors; i++)
+		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
+						&coalesce[i]);
+
+	for (; i < vsi->num_q_vectors; i++) {
+		struct ice_coalesce_stored coalesce_dflt = {
+			.itr_tx = ICE_DFLT_TX_ITR,
+			.itr_rx = ICE_DFLT_RX_ITR,
+			.intrl = 0
+		};
+		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
+						&coalesce_dflt);
+	}
+}
+
+/**
  * ice_vsi_rebuild - Rebuild VSI after reset
  * @vsi: VSI to be rebuild
  * @init_vsi: is this an initialization or a reconfigure of the VSI
@@ -2411,6 +2644,8 @@ int ice_vsi_release(struct ice_vsi *vsi)
 int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_coalesce_stored *coalesce;
+	int prev_num_q_vectors = 0;
 	struct ice_vf *vf = NULL;
 	enum ice_status status;
 	struct ice_pf *pf;
@@ -2423,6 +2658,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 	if (vsi->type == ICE_VSI_VF)
 		vf = &pf->vf[vsi->vf_id];
 
+	coalesce = kcalloc(vsi->num_q_vectors,
+			   sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+	if (coalesce)
+		prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi,
+								  coalesce);
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_free_q_vectors(vsi);
 
@@ -2535,6 +2775,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 			return ice_schedule_reset(pf, ICE_RESET_PFR);
 		}
 	}
+	ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
+	kfree(coalesce);
+
 	return 0;
 
 err_vectors:
@@ -2549,6 +2792,7 @@ err_rings:
 err_vsi:
 	ice_vsi_clear(vsi);
 	set_bit(__ICE_RESET_FAILED, pf->state);
+	kfree(coalesce);
 	return ret;
 }
 
@@ -2740,3 +2984,121 @@ cfg_mac_fltr_exit:
 	ice_free_fltr_list(&vsi->back->pdev->dev, &tmp_add_list);
 	return status;
 }
+
+/**
+ * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
+ * @sw: switch to check if its default forwarding VSI is free
+ *
+ * Return true if the default forwarding VSI is already being used, else returns
+ * false signalling that it's available to use.
+ */
+bool ice_is_dflt_vsi_in_use(struct ice_sw *sw)
+{
+	return (sw->dflt_vsi && sw->dflt_vsi_ena);
+}
+
+/**
+ * ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI
+ * @sw: switch for the default forwarding VSI to compare against
+ * @vsi: VSI to compare against default forwarding VSI
+ *
+ * If this VSI passed in is the default forwarding VSI then return true, else
+ * return false
+ */
+bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
+{
+	return (sw->dflt_vsi == vsi && sw->dflt_vsi_ena);
+}
+
+/**
+ * ice_set_dflt_vsi - set the default forwarding VSI
+ * @sw: switch used to assign the default forwarding VSI
+ * @vsi: VSI getting set as the default forwarding VSI on the switch
+ *
+ * If the VSI passed in is already the default VSI and it's enabled just return
+ * success.
+ *
+ * If there is already a default VSI on the switch and it's enabled then return
+ * -EEXIST since there can only be one default VSI per switch.
+ *
+ *  Otherwise try to set the VSI passed in as the switch's default VSI and
+ *  return the result.
+ */
+int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
+{
+	enum ice_status status;
+	struct device *dev;
+
+	if (!sw || !vsi)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	/* the VSI passed in is already the default VSI */
+	if (ice_is_vsi_dflt_vsi(sw, vsi)) {
+		dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n",
+			vsi->vsi_num);
+		return 0;
+	}
+
+	/* another VSI is already the default VSI for this switch */
+	if (ice_is_dflt_vsi_in_use(sw)) {
+		dev_err(dev,
+			"Default forwarding VSI %d already in use, disable it and try again\n",
+			sw->dflt_vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX);
+	if (status) {
+		dev_err(dev,
+			"Failed to set VSI %d as the default forwarding VSI, error %d\n",
+			vsi->vsi_num, status);
+		return -EIO;
+	}
+
+	sw->dflt_vsi = vsi;
+	sw->dflt_vsi_ena = true;
+
+	return 0;
+}
+
+/**
+ * ice_clear_dflt_vsi - clear the default forwarding VSI
+ * @sw: switch used to clear the default VSI
+ *
+ * If the switch has no default VSI or it's not enabled then return error.
+ *
+ * Otherwise try to clear the default VSI and return the result.
+ */
+int ice_clear_dflt_vsi(struct ice_sw *sw)
+{
+	struct ice_vsi *dflt_vsi;
+	enum ice_status status;
+	struct device *dev;
+
+	if (!sw)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(sw->pf);
+
+	dflt_vsi = sw->dflt_vsi;
+
+	/* there is no default VSI configured */
+	if (!ice_is_dflt_vsi_in_use(sw))
+		return -ENODEV;
+
+	status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false,
+				  ICE_FLTR_RX);
+	if (status) {
+		dev_err(dev,
+			"Failed to clear the default forwarding VSI %d, error %d\n",
+			dflt_vsi->vsi_num, status);
+		return -EIO;
+	}
+
+	sw->dflt_vsi = NULL;
+	sw->dflt_vsi_ena = false;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 6e31e30aba39..68fd0d4505c2 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -103,4 +103,12 @@ enum ice_status
 ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
 
 bool ice_is_safe_mode(struct ice_pf *pf);
+
+bool ice_is_dflt_vsi_in_use(struct ice_sw *sw);
+
+bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
+
+int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
+
+int ice_clear_dflt_vsi(struct ice_sw *sw);
 #endif /* !_ICE_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 69bff085acf7..5ae671609f98 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -13,7 +13,7 @@
 
 #define DRV_VERSION_MAJOR 0
 #define DRV_VERSION_MINOR 8
-#define DRV_VERSION_BUILD 1
+#define DRV_VERSION_BUILD 2
 
 #define DRV_VERSION	__stringify(DRV_VERSION_MAJOR) "." \
 			__stringify(DRV_VERSION_MINOR) "." \
@@ -379,25 +379,29 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 		clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
 		if (vsi->current_netdev_flags & IFF_PROMISC) {
 			/* Apply Rx filter rule to get traffic from wire */
-			status = ice_cfg_dflt_vsi(hw, vsi->idx, true,
-						  ICE_FLTR_RX);
-			if (status) {
-				netdev_err(netdev, "Error setting default VSI %i Rx rule\n",
-					   vsi->vsi_num);
-				vsi->current_netdev_flags &= ~IFF_PROMISC;
-				err = -EIO;
-				goto out_promisc;
+			if (!ice_is_dflt_vsi_in_use(pf->first_sw)) {
+				err = ice_set_dflt_vsi(pf->first_sw, vsi);
+				if (err && err != -EEXIST) {
+					netdev_err(netdev,
+						   "Error %d setting default VSI %i Rx rule\n",
+						   err, vsi->vsi_num);
+					vsi->current_netdev_flags &=
+						~IFF_PROMISC;
+					goto out_promisc;
+				}
 			}
 		} else {
 			/* Clear Rx filter to remove traffic from wire */
-			status = ice_cfg_dflt_vsi(hw, vsi->idx, false,
-						  ICE_FLTR_RX);
-			if (status) {
-				netdev_err(netdev, "Error clearing default VSI %i Rx rule\n",
-					   vsi->vsi_num);
-				vsi->current_netdev_flags |= IFF_PROMISC;
-				err = -EIO;
-				goto out_promisc;
+			if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) {
+				err = ice_clear_dflt_vsi(pf->first_sw);
+				if (err) {
+					netdev_err(netdev,
+						   "Error %d clearing default VSI %i Rx rule\n",
+						   err, vsi->vsi_num);
+					vsi->current_netdev_flags |=
+						IFF_PROMISC;
+					goto out_promisc;
+				}
 			}
 		}
 	}
@@ -472,7 +476,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
 		ice_vc_notify_reset(pf);
 
 	/* Disable VFs until reset is completed */
-	for (i = 0; i < pf->num_alloc_vfs; i++)
+	ice_for_each_vf(pf, i)
 		ice_set_vf_state_qs_dis(&pf->vf[i]);
 
 	/* clear SW filtering DB */
@@ -840,8 +844,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 	ice_vsi_link_event(vsi, link_up);
 	ice_print_link_msg(vsi, link_up);
 
-	if (pf->num_alloc_vfs)
-		ice_vc_notify_link_state(pf);
+	ice_vc_notify_link_state(pf);
 
 	return result;
 }
@@ -1291,7 +1294,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 	}
 
 	/* check to see if one of the VFs caused the MDD */
-	for (i = 0; i < pf->num_alloc_vfs; i++) {
+	ice_for_each_vf(pf, i) {
 		struct ice_vf *vf = &pf->vf[i];
 
 		bool vf_mdd_detected = false;
@@ -2330,7 +2333,8 @@ static void ice_set_netdev_features(struct net_device *netdev)
 			 NETIF_F_HW_VLAN_CTAG_TX     |
 			 NETIF_F_HW_VLAN_CTAG_RX;
 
-	tso_features = NETIF_F_TSO;
+	tso_features = NETIF_F_TSO		|
+		       NETIF_F_GSO_UDP_L4;
 
 	/* set features that user can change */
 	netdev->hw_features = dflt_features | csumo_features |
@@ -3568,6 +3572,15 @@ static const struct pci_device_id ice_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
 	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
 	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822X_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 },
 	/* required last entry */
 	{ 0, }
 };
@@ -4670,6 +4683,13 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		goto err_init_ctrlq;
 	}
 
+	if (pf->first_sw->dflt_vsi_ena)
+		dev_info(dev,
+			 "Clearing default VSI, re-enable after reset completes\n");
+	/* clear the default VSI configuration if it exists */
+	pf->first_sw->dflt_vsi = NULL;
+	pf->first_sw->dflt_vsi_ena = false;
+
 	ice_clear_pxe_mode(hw);
 
 	ret = ice_get_caps(hw);
@@ -4825,7 +4845,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		}
 	}
 
-	netdev_info(netdev, "changed MTU to %d\n", new_mtu);
+	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
 	return 0;
 }
 
@@ -5060,42 +5080,23 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
  * ice_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  */
-static void ice_tx_timeout(struct net_device *netdev)
+static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_ring *tx_ring = NULL;
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
-	int hung_queue = -1;
 	u32 i;
 
 	pf->tx_timeout_count++;
 
-	/* find the stopped queue the same way dev_watchdog() does */
-	for (i = 0; i < netdev->num_tx_queues; i++) {
-		unsigned long trans_start;
-		struct netdev_queue *q;
-
-		q = netdev_get_tx_queue(netdev, i);
-		trans_start = q->trans_start;
-		if (netif_xmit_stopped(q) &&
-		    time_after(jiffies,
-			       trans_start + netdev->watchdog_timeo)) {
-			hung_queue = i;
-			break;
-		}
-	}
-
-	if (i == netdev->num_tx_queues)
-		netdev_info(netdev, "tx_timeout: no netdev hung queue found\n");
-	else
-		/* now that we have an index, find the tx_ring struct */
-		for (i = 0; i < vsi->num_txq; i++)
-			if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
-				if (hung_queue == vsi->tx_rings[i]->q_index) {
-					tx_ring = vsi->tx_rings[i];
-					break;
-				}
+	/* now that we have an index, find the tx_ring struct */
+	for (i = 0; i < vsi->num_txq; i++)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			if (txqueue == vsi->tx_rings[i]->q_index) {
+				tx_ring = vsi->tx_rings[i];
+				break;
+			}
 
 	/* Reset recovery level if enough time has elapsed after last timeout.
 	 * Also ensure no new reset action happens before next timeout period.
@@ -5110,19 +5111,19 @@ static void ice_tx_timeout(struct net_device *netdev)
 		struct ice_hw *hw = &pf->hw;
 		u32 head, val = 0;
 
-		head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[hung_queue])) &
+		head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) &
 			QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
 		/* Read interrupt register */
 		val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
 
 		netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %d, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
-			    vsi->vsi_num, hung_queue, tx_ring->next_to_clean,
+			    vsi->vsi_num, txqueue, tx_ring->next_to_clean,
 			    head, tx_ring->next_to_use, val);
 	}
 
 	pf->tx_timeout_last_recovery = jiffies;
-	netdev_info(netdev, "tx_timeout recovery level %d, hung_queue %d\n",
-		    pf->tx_timeout_recovery_level, hung_queue);
+	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n",
+		    pf->tx_timeout_recovery_level, txqueue);
 
 	switch (pf->tx_timeout_recovery_level) {
 	case 1:
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 57c73f613f32..7525ac50742e 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -289,6 +289,18 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
 
 	nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
 
+	/* the following devices do not have boot_cfg_tlv yet */
+	if (hw->device_id == ICE_DEV_ID_E822C_BACKPLANE ||
+	    hw->device_id == ICE_DEV_ID_E822C_QSFP ||
+	    hw->device_id == ICE_DEV_ID_E822C_10G_BASE_T ||
+	    hw->device_id == ICE_DEV_ID_E822C_SGMII ||
+	    hw->device_id == ICE_DEV_ID_E822C_SFP ||
+	    hw->device_id == ICE_DEV_ID_E822X_BACKPLANE ||
+	    hw->device_id == ICE_DEV_ID_E822L_SFP ||
+	    hw->device_id == ICE_DEV_ID_E822L_10G_BASE_T ||
+	    hw->device_id == ICE_DEV_ID_E822L_SGMII)
+		return status;
+
 	status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len,
 					ICE_SR_BOOT_CFG_PTR);
 	if (status) {
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
new file mode 100644
index 000000000000..71647566964e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_PROTOCOL_TYPE_H_
+#define _ICE_PROTOCOL_TYPE_H_
+/* Decoders for ice_prot_id:
+ * - F: First
+ * - I: Inner
+ * - L: Last
+ * - O: Outer
+ * - S: Single
+ */
+enum ice_prot_id {
+	ICE_PROT_ID_INVAL	= 0,
+	ICE_PROT_IPV4_OF_OR_S	= 32,
+	ICE_PROT_IPV4_IL	= 33,
+	ICE_PROT_IPV6_OF_OR_S	= 40,
+	ICE_PROT_IPV6_IL	= 41,
+	ICE_PROT_TCP_IL		= 49,
+	ICE_PROT_UDP_IL_OR_S	= 53,
+	ICE_PROT_SCTP_IL	= 96,
+	ICE_PROT_META_ID	= 255, /* when offset == metadata */
+	ICE_PROT_INVALID	= 255  /* when offset == ICE_FV_OFFSET_INVAL */
+};
+#endif /* _ICE_PROTOCOL_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index c01597885629..a9a8bc3aca42 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -26,6 +26,7 @@ enum ice_status {
 	ICE_ERR_IN_USE				= -16,
 	ICE_ERR_MAX_LIMIT			= -17,
 	ICE_ERR_RESET_ONGOING			= -18,
+	ICE_ERR_HW_TABLE			= -19,
 	ICE_ERR_NVM_CHECKSUM			= -51,
 	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index b5a53f862a83..431266081a80 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -50,42 +50,6 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0,
 	 ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi)))
 
 /**
- * ice_aq_alloc_free_res - command to allocate/free resources
- * @hw: pointer to the HW struct
- * @num_entries: number of resource entries in buffer
- * @buf: Indirect buffer to hold data parameters and response
- * @buf_size: size of buffer for indirect commands
- * @opc: pass in the command opcode
- * @cd: pointer to command details structure or NULL
- *
- * Helper function to allocate/free resources using the admin queue commands
- */
-static enum ice_status
-ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
-		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
-		      enum ice_adminq_opc opc, struct ice_sq_cd *cd)
-{
-	struct ice_aqc_alloc_free_res_cmd *cmd;
-	struct ice_aq_desc desc;
-
-	cmd = &desc.params.sw_res_ctrl;
-
-	if (!buf)
-		return ICE_ERR_PARAM;
-
-	if (buf_size < (num_entries * sizeof(buf->elem[0])))
-		return ICE_ERR_PARAM;
-
-	ice_fill_dflt_direct_cmd_desc(&desc, opc);
-
-	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
-
-	cmd->num_entries = cpu_to_le16(num_entries);
-
-	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
-}
-
-/**
  * ice_init_def_sw_recp - initialize the recipe book keeping tables
  * @hw: pointer to the HW struct
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 2c212f64d99f..fd17ace6b226 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1071,13 +1071,16 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		ice_put_rx_buf(rx_ring, rx_buf);
 		continue;
 construct_skb:
-		if (skb)
+		if (skb) {
 			ice_add_rx_frag(rx_ring, rx_buf, skb, size);
-		else if (ice_ring_uses_build_skb(rx_ring))
-			skb = ice_build_skb(rx_ring, rx_buf, &xdp);
-		else
+		} else if (likely(xdp.data)) {
+			if (ice_ring_uses_build_skb(rx_ring))
+				skb = ice_build_skb(rx_ring, rx_buf, &xdp);
+			else
+				skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
+		} else {
 			skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
-
+		}
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
 			rx_ring->rx_stats.alloc_buf_failed++;
@@ -1925,6 +1928,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 	} ip;
 	union {
 		struct tcphdr *tcp;
+		struct udphdr *udp;
 		unsigned char *hdr;
 	} l4;
 	u64 cd_mss, cd_tso_len;
@@ -1958,10 +1962,18 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 
 	/* remove payload length from checksum */
 	paylen = skb->len - l4_start;
-	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
 
-	/* compute length of segmentation header */
-	off->header_len = (l4.tcp->doff * 4) + l4_start;
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of UDP segmentation header */
+		off->header_len = sizeof(l4.udp) + l4_start;
+	} else {
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of TCP segmentation header */
+		off->header_len = (l4.tcp->doff * 4) + l4_start;
+	}
 
 	/* update gso_segs and bytecount */
 	first->gso_segs = skb_shinfo(skb)->gso_segs;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index a84cc0e6dd27..a86270696df1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -341,6 +341,12 @@ struct ice_ring_container {
 	u16 itr_setting;
 };
 
+struct ice_coalesce_stored {
+	u16 itr_tx;
+	u16 itr_rx;
+	u8 intrl;
+};
+
 /* iterator for handling rings in ring container */
 #define ice_for_each_ring(pos, head) \
 	for (pos = (head).ring; pos; pos = pos->next)
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index c4854a987130..b361ffabb0ca 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -13,6 +13,7 @@
 #include "ice_controlq.h"
 #include "ice_lan_tx_rx.h"
 #include "ice_flex_type.h"
+#include "ice_protocol_type.h"
 
 static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 {
@@ -41,6 +42,7 @@ static inline u32 ice_round_to_num(u32 N, u32 R)
 #define ICE_DBG_QCTX		BIT_ULL(6)
 #define ICE_DBG_NVM		BIT_ULL(7)
 #define ICE_DBG_LAN		BIT_ULL(8)
+#define ICE_DBG_FLOW		BIT_ULL(9)
 #define ICE_DBG_SW		BIT_ULL(13)
 #define ICE_DBG_SCHED		BIT_ULL(14)
 #define ICE_DBG_PKG		BIT_ULL(16)
@@ -559,6 +561,10 @@ struct ice_hw {
 
 	/* HW block tables */
 	struct ice_blk_info blk[ICE_BLK_COUNT];
+	struct mutex fl_profs_locks[ICE_BLK_COUNT];	/* lock fltr profiles */
+	struct list_head fl_profs[ICE_BLK_COUNT];
+	struct mutex rss_locks;	/* protect RSS configuration */
+	struct list_head rss_list_head;
 };
 
 /* Statistics collected by each port, VSI, VEB, and S-channel */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index edb374296d1f..82b1e7a4cb92 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -35,37 +35,6 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
 }
 
 /**
- * ice_err_to_virt err - translate errors for VF return code
- * @ice_err: error return code
- */
-static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err)
-{
-	switch (ice_err) {
-	case ICE_SUCCESS:
-		return VIRTCHNL_STATUS_SUCCESS;
-	case ICE_ERR_BAD_PTR:
-	case ICE_ERR_INVAL_SIZE:
-	case ICE_ERR_DEVICE_NOT_SUPPORTED:
-	case ICE_ERR_PARAM:
-	case ICE_ERR_CFG:
-		return VIRTCHNL_STATUS_ERR_PARAM;
-	case ICE_ERR_NO_MEMORY:
-		return VIRTCHNL_STATUS_ERR_NO_MEMORY;
-	case ICE_ERR_NOT_READY:
-	case ICE_ERR_RESET_FAILED:
-	case ICE_ERR_FW_API_VER:
-	case ICE_ERR_AQ_ERROR:
-	case ICE_ERR_AQ_TIMEOUT:
-	case ICE_ERR_AQ_FULL:
-	case ICE_ERR_AQ_NO_WORK:
-	case ICE_ERR_AQ_EMPTY:
-		return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
-	default:
-		return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
-	}
-}
-
-/**
  * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
  * @pf: pointer to the PF structure
  * @v_opcode: operation code
@@ -78,10 +47,11 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
 		    enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
 {
 	struct ice_hw *hw = &pf->hw;
-	struct ice_vf *vf = pf->vf;
 	int i;
 
-	for (i = 0; i < pf->num_alloc_vfs; i++, vf++) {
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
 		/* Not all vfs are enabled so skip the ones that are not */
 		if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
 		    !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
@@ -121,26 +91,6 @@ ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
 }
 
 /**
- * ice_set_pfe_link_forced - Force the virtchnl_pf_event link speed/status
- * @vf: pointer to the VF structure
- * @pfe: pointer to the virtchnl_pf_event to set link speed/status for
- * @link_up: whether or not to set the link up/down
- */
-static void
-ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
-			bool link_up)
-{
-	u16 link_speed;
-
-	if (link_up)
-		link_speed = ICE_AQ_LINK_SPEED_100GB;
-	else
-		link_speed = ICE_AQ_LINK_SPEED_UNKNOWN;
-
-	ice_set_pfe_link(vf, pfe, link_speed, link_up);
-}
-
-/**
  * ice_vc_notify_vf_link_state - Inform a VF of link status
  * @vf: pointer to the VF structure
  *
@@ -160,13 +110,17 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
 	pfe.severity = PF_EVENT_SEVERITY_INFO;
 
 	/* Always report link is down if the VF queues aren't enabled */
-	if (!vf->num_qs_ena)
+	if (!vf->num_qs_ena) {
 		ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
-	else if (vf->link_forced)
-		ice_set_pfe_link_forced(vf, &pfe, vf->link_up);
-	else
-		ice_set_pfe_link(vf, &pfe, ls->link_speed, ls->link_info &
-				 ICE_AQ_LINK_UP);
+	} else if (vf->link_forced) {
+		u16 link_speed = vf->link_up ?
+			ls->link_speed : ICE_AQ_LINK_SPEED_UNKNOWN;
+
+		ice_set_pfe_link(vf, &pfe, link_speed, vf->link_up);
+	} else {
+		ice_set_pfe_link(vf, &pfe, ls->link_speed,
+				 ls->link_info & ICE_AQ_LINK_UP);
+	}
 
 	ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
 			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
@@ -331,7 +285,7 @@ void ice_free_vfs(struct ice_pf *pf)
 		usleep_range(1000, 2000);
 
 	/* Avoid wait time by stopping all VFs at the same time */
-	for (i = 0; i < pf->num_alloc_vfs; i++)
+	ice_for_each_vf(pf, i)
 		if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states))
 			ice_dis_vf_qs(&pf->vf[i]);
 
@@ -991,10 +945,17 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf)
 
 	/* reallocate VF resources to finish resetting the VSI state */
 	if (!ice_alloc_vf_res(vf)) {
+		struct ice_vsi *vsi;
+
 		ice_ena_vf_mappings(vf);
 		set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
 		clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
-		vf->num_vlan = 0;
+
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		if (ice_vsi_add_vlan(vsi, 0))
+			dev_warn(ice_pf_to_dev(pf),
+				 "Failed to add VLAN 0 filter for VF %d, MDD events will trigger. Reset the VF, disable spoofchk, or enable 8021q module on the guest",
+				 vf->vf_id);
 	}
 
 	/* Tell the VF driver the reset is done. This needs to be done only
@@ -1023,7 +984,7 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
 	struct ice_hw *hw;
 
 	hw = &pf->hw;
-	if (vf->num_vlan) {
+	if (vsi->num_vlan) {
 		status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
 						  rm_promisc);
 	} else if (vf->port_vlan_id) {
@@ -1070,7 +1031,7 @@ static bool ice_config_res_vfs(struct ice_pf *pf)
 		ice_irq_dynamic_ena(hw, NULL, NULL);
 
 	/* Finish resetting each VF and allocate resources */
-	for (v = 0; v < pf->num_alloc_vfs; v++) {
+	ice_for_each_vf(pf, v) {
 		struct ice_vf *vf = &pf->vf[v];
 
 		vf->num_vf_qs = pf->num_vf_qps;
@@ -1113,10 +1074,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 		return false;
 
 	/* Begin reset on all VFs at once */
-	for (v = 0; v < pf->num_alloc_vfs; v++)
+	ice_for_each_vf(pf, v)
 		ice_trigger_vf_reset(&pf->vf[v], is_vflr, true);
 
-	for (v = 0; v < pf->num_alloc_vfs; v++) {
+	ice_for_each_vf(pf, v) {
 		struct ice_vsi *vsi;
 
 		vf = &pf->vf[v];
@@ -1161,7 +1122,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 		dev_warn(dev, "VF reset check timeout\n");
 
 	/* free VF resources to begin resetting the VSI state */
-	for (v = 0; v < pf->num_alloc_vfs; v++) {
+	ice_for_each_vf(pf, v) {
 		vf = &pf->vf[v];
 
 		ice_free_vf_res(vf);
@@ -1273,7 +1234,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	 */
 	if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
 	    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
-		if (vf->port_vlan_id ||  vf->num_vlan)
+		if (vf->port_vlan_id || vsi->num_vlan)
 			promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
 		else
 			promisc_m = ICE_UCAST_PROMISC_BITS;
@@ -1301,7 +1262,7 @@ void ice_vc_notify_link_state(struct ice_pf *pf)
 {
 	int i;
 
-	for (i = 0; i < pf->num_alloc_vfs; i++)
+	ice_for_each_vf(pf, i)
 		ice_vc_notify_vf_link_state(&pf->vf[i]);
 }
 
@@ -1385,9 +1346,10 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 		goto err_pci_disable_sriov;
 	}
 	pf->vf = vfs;
+	pf->num_alloc_vfs = num_alloc_vfs;
 
 	/* apply default profile */
-	for (i = 0; i < num_alloc_vfs; i++) {
+	ice_for_each_vf(pf, i) {
 		vfs[i].pf = pf;
 		vfs[i].vf_sw_id = pf->first_sw;
 		vfs[i].vf_id = i;
@@ -1396,7 +1358,6 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
 		vfs[i].spoofchk = true;
 	}
-	pf->num_alloc_vfs = num_alloc_vfs;
 
 	/* VF resources get allocated with initialization */
 	if (!ice_config_res_vfs(pf)) {
@@ -1535,7 +1496,7 @@ void ice_process_vflr_event(struct ice_pf *pf)
 	    !pf->num_alloc_vfs)
 		return;
 
-	for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
+	ice_for_each_vf(pf, vf_id) {
 		struct ice_vf *vf = &pf->vf[vf_id];
 		u32 reg_idx, bit_idx;
 
@@ -1918,6 +1879,89 @@ error_param:
 }
 
 /**
+ * ice_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ena: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ */
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_vsi_ctx *ctx;
+	struct ice_vsi *vf_vsi;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_vf *vf;
+	int ret = 0;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+
+	if (ice_check_vf_init(pf, vf))
+		return -EBUSY;
+
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vf_vsi) {
+		netdev_err(netdev, "VSI %d for VF %d is null\n",
+			   vf->lan_vsi_idx, vf->vf_id);
+		return -EINVAL;
+	}
+
+	if (vf_vsi->type != ICE_VSI_VF) {
+		netdev_err(netdev,
+			   "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
+			   vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
+		return -ENODEV;
+	}
+
+	if (ena == vf->spoofchk) {
+		dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF");
+		return 0;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->info.sec_flags = vf_vsi->info.sec_flags;
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+	if (ena) {
+		ctx->info.sec_flags |=
+			ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+			(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+			 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+	} else {
+		ctx->info.sec_flags &=
+			~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+			  (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+			   ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
+	}
+
+	status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL);
+	if (status) {
+		dev_err(dev,
+			"Failed to %sable spoofchk on VF %d VSI %d\n error %d",
+			ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status);
+		ret = -EIO;
+		goto out;
+	}
+
+	/* only update spoofchk state and VSI context on success */
+	vf_vsi->info.sec_flags = ctx->info.sec_flags;
+	vf->spoofchk = ena;
+
+out:
+	kfree(ctx);
+	return ret;
+}
+
+/**
  * ice_vc_get_stats_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -2409,6 +2453,83 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf)
 }
 
 /**
+ * ice_vc_add_mac_addr - attempt to add the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @mac_addr: MAC address to add
+ */
+static int
+ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum ice_status status;
+
+	/* default unicast MAC already added */
+	if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+		return 0;
+
+	if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
+		dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+		return -EPERM;
+	}
+
+	status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, true);
+	if (status == ICE_ERR_ALREADY_EXISTS) {
+		dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr,
+			vf->vf_id);
+		return -EEXIST;
+	} else if (status) {
+		dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n",
+			mac_addr, vf->vf_id, status);
+		return -EIO;
+	}
+
+	/* only set dflt_lan_addr once */
+	if (is_zero_ether_addr(vf->dflt_lan_addr.addr) &&
+	    is_unicast_ether_addr(mac_addr))
+		ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr);
+
+	vf->num_mac++;
+
+	return 0;
+}
+
+/**
+ * ice_vc_del_mac_addr - attempt to delete the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @mac_addr: MAC address to delete
+ */
+static int
+ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum ice_status status;
+
+	if (!ice_can_vf_change_mac(vf) &&
+	    ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+		return 0;
+
+	status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, false);
+	if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr,
+			vf->vf_id);
+		return -ENOENT;
+	} else if (status) {
+		dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n",
+			mac_addr, vf->vf_id, status);
+		return -EIO;
+	}
+
+	if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+		eth_zero_addr(vf->dflt_lan_addr.addr);
+
+	vf->num_mac--;
+
+	return 0;
+}
+
+/**
  * ice_vc_handle_mac_addr_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -2419,23 +2540,23 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf)
 static int
 ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 {
+	int (*ice_vc_cfg_mac)
+		(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr);
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
 	struct virtchnl_ether_addr_list *al =
 	    (struct virtchnl_ether_addr_list *)msg;
 	struct ice_pf *pf = vf->pf;
 	enum virtchnl_ops vc_op;
-	enum ice_status status;
 	struct ice_vsi *vsi;
-	struct device *dev;
-	int mac_count = 0;
 	int i;
 
-	dev = ice_pf_to_dev(pf);
-
-	if (set)
+	if (set) {
 		vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
-	else
+		ice_vc_cfg_mac = ice_vc_add_mac_addr;
+	} else {
 		vc_op = VIRTCHNL_OP_DEL_ETH_ADDR;
+		ice_vc_cfg_mac = ice_vc_del_mac_addr;
+	}
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
 	    !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
@@ -2443,14 +2564,15 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 		goto handle_mac_exit;
 	}
 
+	/* If this VF is not privileged, then we can't add more than a
+	 * limited number of addresses. Check to make sure that the
+	 * additions do not push us over the limit.
+	 */
 	if (set && !ice_is_vf_trusted(vf) &&
 	    (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
-		dev_err(dev,
+		dev_err(ice_pf_to_dev(pf),
 			"Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
 			vf->vf_id);
-		/* There is no need to let VF know about not being trusted
-		 * to add more MAC addr, so we can just return success message.
-		 */
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto handle_mac_exit;
 	}
@@ -2462,70 +2584,22 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 	}
 
 	for (i = 0; i < al->num_elements; i++) {
-		u8 *maddr = al->list[i].addr;
+		u8 *mac_addr = al->list[i].addr;
+		int result;
 
-		if (ether_addr_equal(maddr, vf->dflt_lan_addr.addr) ||
-		    is_broadcast_ether_addr(maddr)) {
-			if (set) {
-				/* VF is trying to add filters that the PF
-				 * already added. Just continue.
-				 */
-				dev_info(dev,
-					 "MAC %pM already set for VF %d\n",
-					 maddr, vf->vf_id);
-				continue;
-			} else {
-				/* VF can't remove dflt_lan_addr/bcast MAC */
-				dev_err(dev,
-					"VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n",
-					maddr, vf->vf_id);
-				continue;
-			}
-		}
-
-		/* check for the invalid cases and bail if necessary */
-		if (is_zero_ether_addr(maddr)) {
-			dev_err(dev,
-				"invalid MAC %pM provided for VF %d\n",
-				maddr, vf->vf_id);
-			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-			goto handle_mac_exit;
-		}
-
-		if (is_unicast_ether_addr(maddr) &&
-		    !ice_can_vf_change_mac(vf)) {
-			dev_err(dev,
-				"can't change unicast MAC for untrusted VF %d\n",
-				vf->vf_id);
-			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-			goto handle_mac_exit;
-		}
+		if (is_broadcast_ether_addr(mac_addr) ||
+		    is_zero_ether_addr(mac_addr))
+			continue;
 
-		/* program the updated filter list */
-		status = ice_vsi_cfg_mac_fltr(vsi, maddr, set);
-		if (status == ICE_ERR_DOES_NOT_EXIST ||
-		    status == ICE_ERR_ALREADY_EXISTS) {
-			dev_info(dev,
-				 "can't %s MAC filters %pM for VF %d, error %d\n",
-				 set ? "add" : "remove", maddr, vf->vf_id,
-				 status);
-		} else if (status) {
-			dev_err(dev,
-				"can't %s MAC filters for VF %d, error %d\n",
-				set ? "add" : "remove", vf->vf_id, status);
-			v_ret = ice_err_to_virt_err(status);
+		result = ice_vc_cfg_mac(vf, vsi, mac_addr);
+		if (result == -EEXIST || result == -ENOENT) {
+			continue;
+		} else if (result) {
+			v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
 			goto handle_mac_exit;
 		}
-
-		mac_count++;
 	}
 
-	/* Track number of MAC filters programmed for the VF VSI */
-	if (set)
-		vf->num_mac += mac_count;
-	else
-		vf->num_mac -= mac_count;
-
 handle_mac_exit:
 	/* send the response to the VF */
 	return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0);
@@ -2744,17 +2818,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 		goto error_param;
 	}
 
-	if (add_v && !ice_is_vf_trusted(vf) &&
-	    vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
-		dev_info(dev,
-			 "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
-			 vf->vf_id);
-		/* There is no need to let VF know about being not trusted,
-		 * so we can just return success message here
-		 */
-		goto error_param;
-	}
-
 	for (i = 0; i < vfl->num_elements; i++) {
 		if (vfl->vlan_id[i] > ICE_MAX_VLANID) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2771,6 +2834,17 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 		goto error_param;
 	}
 
+	if (add_v && !ice_is_vf_trusted(vf) &&
+	    vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+		dev_info(dev,
+			 "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+			 vf->vf_id);
+		/* There is no need to let VF know about being not trusted,
+		 * so we can just return success message here
+		 */
+		goto error_param;
+	}
+
 	if (vsi->info.pvid) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2785,7 +2859,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 			u16 vid = vfl->vlan_id[i];
 
 			if (!ice_is_vf_trusted(vf) &&
-			    vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+			    vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
 				dev_info(dev,
 					 "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
 					 vf->vf_id);
@@ -2796,12 +2870,20 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 				goto error_param;
 			}
 
-			if (ice_vsi_add_vlan(vsi, vid)) {
+			/* we add VLAN 0 by default for each VF so we can enable
+			 * Tx VLAN anti-spoof without triggering MDD events so
+			 * we don't need to add it again here
+			 */
+			if (!vid)
+				continue;
+
+			status = ice_vsi_add_vlan(vsi, vid);
+			if (status) {
 				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 				goto error_param;
 			}
 
-			vf->num_vlan++;
+			vsi->num_vlan++;
 			/* Enable VLAN pruning when VLAN is added */
 			if (!vlan_promisc) {
 				status = ice_cfg_vlan_pruning(vsi, true, false);
@@ -2837,21 +2919,29 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 		 */
 		int num_vf_vlan;
 
-		num_vf_vlan = vf->num_vlan;
+		num_vf_vlan = vsi->num_vlan;
 		for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
 			u16 vid = vfl->vlan_id[i];
 
+			/* we add VLAN 0 by default for each VF so we can enable
+			 * Tx VLAN anti-spoof without triggering MDD events so
+			 * we don't want a VIRTCHNL request to remove it
+			 */
+			if (!vid)
+				continue;
+
 			/* Make sure ice_vsi_kill_vlan is successful before
 			 * updating VLAN information
 			 */
-			if (ice_vsi_kill_vlan(vsi, vid)) {
+			status = ice_vsi_kill_vlan(vsi, vid);
+			if (status) {
 				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 				goto error_param;
 			}
 
-			vf->num_vlan--;
+			vsi->num_vlan--;
 			/* Disable VLAN pruning when the last VLAN is removed */
-			if (!vf->num_vlan)
+			if (!vsi->num_vlan)
 				ice_cfg_vlan_pruning(vsi, false, false);
 
 			/* Disable Unicast/Multicast VLAN promiscuous mode */
@@ -3165,65 +3255,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
 }
 
 /**
- * ice_set_vf_spoofchk
- * @netdev: network interface device structure
- * @vf_id: VF identifier
- * @ena: flag to enable or disable feature
- *
- * Enable or disable VF spoof checking
- */
-int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
-{
-	struct ice_pf *pf = ice_netdev_to_pf(netdev);
-	struct ice_vsi *vsi = pf->vsi[0];
-	struct ice_vsi_ctx *ctx;
-	enum ice_status status;
-	struct device *dev;
-	struct ice_vf *vf;
-	int ret = 0;
-
-	dev = ice_pf_to_dev(pf);
-	if (ice_validate_vf_id(pf, vf_id))
-		return -EINVAL;
-
-	vf = &pf->vf[vf_id];
-	if (ice_check_vf_init(pf, vf))
-		return -EBUSY;
-
-	if (ena == vf->spoofchk) {
-		dev_dbg(dev, "VF spoofchk already %s\n",
-			ena ? "ON" : "OFF");
-		return 0;
-	}
-
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
-	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
-
-	if (ena) {
-		ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
-		ctx->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M;
-	}
-
-	status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
-	if (status) {
-		dev_dbg(dev,
-			"Error %d, failed to update VSI* parameters\n", status);
-		ret = -EIO;
-		goto out;
-	}
-
-	vf->spoofchk = ena;
-	vsi->info.sec_flags = ctx->info.sec_flags;
-	vsi->info.sw_flags2 = ctx->info.sw_flags2;
-out:
-	kfree(ctx);
-	return ret;
-}
-
-/**
  * ice_wait_on_vf_reset
  * @vf: The VF being resseting
  *
@@ -3344,28 +3375,18 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
 int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
 {
 	struct ice_pf *pf = ice_netdev_to_pf(netdev);
-	struct virtchnl_pf_event pfe = { 0 };
-	struct ice_link_status *ls;
 	struct ice_vf *vf;
-	struct ice_hw *hw;
 
 	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
 
 	vf = &pf->vf[vf_id];
-	hw = &pf->hw;
-	ls = &pf->hw.port_info->phy.link_info;
-
 	if (ice_check_vf_init(pf, vf))
 		return -EBUSY;
 
-	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
-	pfe.severity = PF_EVENT_SEVERITY_INFO;
-
 	switch (link_state) {
 	case IFLA_VF_LINK_STATE_AUTO:
 		vf->link_forced = false;
-		vf->link_up = ls->link_info & ICE_AQ_LINK_UP;
 		break;
 	case IFLA_VF_LINK_STATE_ENABLE:
 		vf->link_forced = true;
@@ -3379,15 +3400,7 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
 		return -EINVAL;
 	}
 
-	if (vf->link_forced)
-		ice_set_pfe_link_forced(vf, &pfe, vf->link_up);
-	else
-		ice_set_pfe_link(vf, &pfe, ls->link_speed, vf->link_up);
-
-	/* Notify the VF of its new link state */
-	ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
-			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
-			      sizeof(pfe), NULL);
+	ice_vc_notify_vf_link_state(vf);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 88aa65d5cb31..4647d636ed36 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -40,6 +40,9 @@
 #define ICE_DFLT_INTR_PER_VF		(ICE_DFLT_QS_PER_VF + 1)
 #define ICE_MAX_VF_RESET_WAIT		15
 
+#define ice_for_each_vf(pf, i) \
+	for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++)
+
 /* Specific VF states */
 enum ice_vf_states {
 	ICE_VF_STATE_INIT = 0,		/* PF is initializing VF */
@@ -91,7 +94,6 @@ struct ice_vf {
 	unsigned long vf_caps;		/* VF's adv. capabilities */
 	u8 num_req_qs;			/* num of queue pairs requested by VF */
 	u16 num_mac;
-	u16 num_vlan;
 	u16 num_vf_qs;			/* num of queue configured per VF */
 	u16 num_qs_ena;			/* total num of Tx/Rx queue enabled */
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index cf9b8b22d24f..149dca0012ba 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -414,7 +414,8 @@ ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
 	if (vsi->type != ICE_VSI_PF)
 		return -EINVAL;
 
-	vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+	if (!vsi->num_xsk_umems)
+		vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
 	if (qid >= vsi->num_xsk_umems)
 		return -EINVAL;
 
@@ -555,7 +556,7 @@ ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 
 	rx_buf->handle = handle + umem->headroom;
 
-	xsk_umem_discard_addr(umem);
+	xsk_umem_release_addr(umem);
 	return true;
 }
 
@@ -591,7 +592,7 @@ ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 
 	rx_buf->handle = handle + umem->headroom;
 
-	xsk_umem_discard_addr_rq(umem);
+	xsk_umem_release_addr_rq(umem);
 	return true;
 }
 
@@ -1019,8 +1020,8 @@ bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
 	s16 ntc = xdp_ring->next_to_clean;
 	struct ice_tx_desc *tx_desc;
 	struct ice_tx_buf *tx_buf;
-	bool xmit_done = true;
 	u32 xsk_frames = 0;
+	bool xmit_done;
 
 	tx_desc = ICE_TX_DESC(xdp_ring, ntc);
 	tx_buf = &xdp_ring->tx_buf[ntc];
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index ca54e268d157..49b5fa9d4783 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -661,6 +661,7 @@ void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
 void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
 void igb_setup_tctl(struct igb_adapter *);
 void igb_setup_rctl(struct igb_adapter *);
+void igb_setup_srrctl(struct igb_adapter *, struct igb_ring *);
 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
 void igb_alloc_rx_buffers(struct igb_ring *, u16);
 void igb_update_stats(struct igb_adapter *);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 445fbdce3e25..f96ffa83efbe 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -396,6 +396,7 @@ static int igb_set_pauseparam(struct net_device *netdev,
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 	int retval = 0;
+	int i;
 
 	/* 100basefx does not support setting link flow control */
 	if (hw->dev_spec._82575.eth_flags.e100_base_fx)
@@ -428,6 +429,13 @@ static int igb_set_pauseparam(struct net_device *netdev,
 
 		retval = ((hw->phy.media_type == e1000_media_type_copper) ?
 			  igb_force_mac_fc(hw) : igb_setup_link(hw));
+
+		/* Make sure SRRCTL considers new fc settings for each ring */
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct igb_ring *ring = adapter->rx_ring[i];
+
+			igb_setup_srrctl(adapter, ring);
+		}
 	}
 
 	clear_bit(__IGB_RESETTING, &adapter->state);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 98346eb064d5..b46bff8fe056 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -146,7 +146,7 @@ static int igb_poll(struct napi_struct *, int);
 static bool igb_clean_tx_irq(struct igb_q_vector *, int);
 static int igb_clean_rx_irq(struct igb_q_vector *, int);
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
-static void igb_tx_timeout(struct net_device *);
+static void igb_tx_timeout(struct net_device *, unsigned int txqueue);
 static void igb_reset_task(struct work_struct *);
 static void igb_vlan_mode(struct net_device *netdev,
 			  netdev_features_t features);
@@ -4468,6 +4468,37 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter,
 }
 
 /**
+ *  igb_setup_srrctl - configure the split and replication receive control
+ *                     registers
+ *  @adapter: Board private structure
+ *  @ring: receive ring to be configured
+ **/
+void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int reg_idx = ring->reg_idx;
+	u32 srrctl = 0;
+
+	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+	if (hw->mac.type >= e1000_82580)
+		srrctl |= E1000_SRRCTL_TIMESTAMP;
+	/* Only set Drop Enable if VFs allocated, or we are supporting multiple
+	 * queues and rx flow control is disabled
+	 */
+	if (adapter->vfs_allocated_count ||
+	    (!(hw->fc.current_mode & e1000_fc_rx_pause) &&
+	     adapter->num_rx_queues > 1))
+		srrctl |= E1000_SRRCTL_DROP_EN;
+
+	wr32(E1000_SRRCTL(reg_idx), srrctl);
+}
+
+/**
  *  igb_configure_rx_ring - Configure a receive ring after Reset
  *  @adapter: board private structure
  *  @ring: receive ring to be configured
@@ -4481,7 +4512,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
 	union e1000_adv_rx_desc *rx_desc;
 	u64 rdba = ring->dma;
 	int reg_idx = ring->reg_idx;
-	u32 srrctl = 0, rxdctl = 0;
+	u32 rxdctl = 0;
 
 	/* disable the queue */
 	wr32(E1000_RXDCTL(reg_idx), 0);
@@ -4499,19 +4530,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
 	writel(0, ring->tail);
 
 	/* set descriptor configuration */
-	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
-	if (ring_uses_large_buffer(ring))
-		srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
-	else
-		srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
-	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
-	if (hw->mac.type >= e1000_82580)
-		srrctl |= E1000_SRRCTL_TIMESTAMP;
-	/* Only set Drop Enable if we are supporting multiple queues */
-	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
-		srrctl |= E1000_SRRCTL_DROP_EN;
-
-	wr32(E1000_SRRCTL(reg_idx), srrctl);
+	igb_setup_srrctl(adapter, ring);
 
 	/* set filtering for VMDQ pools */
 	igb_set_vmolr(adapter, reg_idx & 0x7, true);
@@ -6184,7 +6203,7 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
  *  igb_tx_timeout - Respond to a Tx Hang
  *  @netdev: network interface device structure
  **/
-static void igb_tx_timeout(struct net_device *netdev)
+static void igb_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 6003dc3ff5fd..5b1800c3ba82 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2375,7 +2375,7 @@ static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb,
  * igbvf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  **/
-static void igbvf_tx_timeout(struct net_device *netdev)
+static void igbvf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
index 88c6f88baac5..49fb1e1965cd 100644
--- a/drivers/net/ethernet/intel/igc/Makefile
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -8,4 +8,4 @@
 obj-$(CONFIG_IGC) += igc.o
 
 igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
-igc_ethtool.o
+igc_ethtool.o igc_ptp.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 0868677d43ed..52066bdbbad0 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -10,6 +10,9 @@
 #include <linux/vmalloc.h>
 #include <linux/ethtool.h>
 #include <linux/sctp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
 
 #include "igc_hw.h"
 
@@ -45,11 +48,15 @@ extern char igc_driver_version[];
 #define IGC_REGS_LEN			740
 #define IGC_RETA_SIZE			128
 
+/* flags controlling PTP/1588 function */
+#define IGC_PTP_ENABLED		BIT(0)
+
 /* Interrupt defines */
 #define IGC_START_ITR			648 /* ~6000 ints/sec */
 #define IGC_FLAG_HAS_MSI		BIT(0)
 #define IGC_FLAG_QUEUE_PAIRS		BIT(3)
 #define IGC_FLAG_DMAC			BIT(4)
+#define IGC_FLAG_PTP			BIT(8)
 #define IGC_FLAG_NEED_LINK_UPDATE	BIT(9)
 #define IGC_FLAG_MEDIA_RESET		BIT(10)
 #define IGC_FLAG_MAS_ENABLE		BIT(12)
@@ -100,6 +107,20 @@ extern char igc_driver_version[];
 #define AUTO_ALL_MODES		0
 #define IGC_RX_HDR_LEN			IGC_RXBUFFER_256
 
+/* Transmit and receive latency (for PTP timestamps) */
+/* FIXME: These values were estimated using the ones that i210 has as
+ * basis, they seem to provide good numbers with ptp4l/phc2sys, but we
+ * need to confirm them.
+ */
+#define IGC_I225_TX_LATENCY_10		9542
+#define IGC_I225_TX_LATENCY_100		1024
+#define IGC_I225_TX_LATENCY_1000	178
+#define IGC_I225_TX_LATENCY_2500	64
+#define IGC_I225_RX_LATENCY_10		20662
+#define IGC_I225_RX_LATENCY_100		2213
+#define IGC_I225_RX_LATENCY_1000	448
+#define IGC_I225_RX_LATENCY_2500	160
+
 /* RX and TX descriptor control thresholds.
  * PTHRESH - MAC will consider prefetch if it has fewer than this number of
  *           descriptors available in its onboard memory.
@@ -370,6 +391,8 @@ struct igc_adapter {
 	struct timer_list dma_err_timer;
 	struct timer_list phy_info_timer;
 
+	u32 wol;
+	u32 en_mng_pt;
 	u16 link_speed;
 	u16 link_duplex;
 
@@ -430,6 +453,20 @@ struct igc_adapter {
 
 	unsigned long link_check_timeout;
 	struct igc_info ei;
+
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	struct work_struct ptp_tx_work;
+	struct sk_buff *ptp_tx_skb;
+	struct hwtstamp_config tstamp_config;
+	unsigned long ptp_tx_start;
+	unsigned long last_rx_ptp_check;
+	unsigned long last_rx_timestamp;
+	unsigned int ptp_flags;
+	/* System time value lock */
+	spinlock_t tmreg_lock;
+	struct cyclecounter cc;
+	struct timecounter tc;
 };
 
 /* igc_desc_unused - calculate if we have unused descriptors */
@@ -513,6 +550,16 @@ int igc_add_filter(struct igc_adapter *adapter,
 int igc_erase_filter(struct igc_adapter *adapter,
 		     struct igc_nfc_filter *input);
 
+void igc_ptp_init(struct igc_adapter *adapter);
+void igc_ptp_reset(struct igc_adapter *adapter);
+void igc_ptp_stop(struct igc_adapter *adapter);
+void igc_ptp_rx_rgtstamp(struct igc_q_vector *q_vector, struct sk_buff *skb);
+void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va,
+			 struct sk_buff *skb);
+int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
+int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
+void igc_ptp_tx_hang(struct igc_adapter *adapter);
+
 #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
 
 #define IGC_TXD_DCMD	(IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index db289bcce21d..5a506440560a 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -212,6 +212,7 @@ static s32 igc_get_invariants_base(struct igc_hw *hw)
 	case IGC_DEV_ID_I225_I:
 	case IGC_DEV_ID_I220_V:
 	case IGC_DEV_ID_I225_K:
+	case IGC_DEV_ID_I225_BLANK_NVM:
 		mac->type = igc_i225;
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index f3788f0b95b4..58efa7a02c68 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -10,6 +10,37 @@
 
 #define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
 
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define IGC_WUC_PME_EN	0x00000002 /* PME Enable */
+
+/* Wake Up Filter Control */
+#define IGC_WUFC_LNKC	0x00000001 /* Link Status Change Wakeup Enable */
+#define IGC_WUFC_MC	0x00000008 /* Directed Multicast Wakeup Enable */
+
+#define IGC_CTRL_ADVD3WUC	0x00100000  /* D3 WUC */
+
+/* Wake Up Status */
+#define IGC_WUS_EX	0x00000004 /* Directed Exact */
+#define IGC_WUS_ARPD	0x00000020 /* Directed ARP Request */
+#define IGC_WUS_IPV4	0x00000040 /* Directed IPv4 */
+#define IGC_WUS_IPV6	0x00000080 /* Directed IPv6 */
+#define IGC_WUS_NSD	0x00000400 /* Directed IPv6 Neighbor Solicitation */
+
+/* Packet types that are enabled for wake packet delivery */
+#define WAKE_PKT_WUS ( \
+	IGC_WUS_EX   | \
+	IGC_WUS_ARPD | \
+	IGC_WUS_IPV4 | \
+	IGC_WUS_IPV6 | \
+	IGC_WUS_NSD)
+
+/* Wake Up Packet Length */
+#define IGC_WUPL_MASK	0x00000FFF
+
+/* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */
+#define IGC_WUPM_BYTES	128
+
 /* Physical Func Reset Done Indication */
 #define IGC_CTRL_EXT_LINK_MODE_MASK	0x00C00000
 
@@ -187,6 +218,7 @@
 #define IGC_ICR_RXDMT0		BIT(4)	/* Rx desc min. threshold (0) */
 #define IGC_ICR_RXO		BIT(6)	/* Rx overrun */
 #define IGC_ICR_RXT0		BIT(7)	/* Rx timer intr (ring 0) */
+#define IGC_ICR_TS		BIT(19)	/* Time Sync Interrupt */
 #define IGC_ICR_DRSTA		BIT(30)	/* Device Reset Asserted */
 
 /* If this bit asserted, the driver should claim the interrupt */
@@ -209,6 +241,7 @@
 #define IGC_IMS_DRSTA		IGC_ICR_DRSTA	/* Device Reset Asserted */
 #define IGC_IMS_RXT0		IGC_ICR_RXT0	/* Rx timer intr */
 #define IGC_IMS_RXDMT0		IGC_ICR_RXDMT0	/* Rx desc min. threshold */
+#define IGC_IMS_TS		IGC_ICR_TS	/* Time Sync Interrupt */
 
 #define IGC_QVECTOR_MASK	0x7FFC		/* Q-vector mask */
 #define IGC_ITR_VAL_MASK	0x04		/* ITR value mask */
@@ -249,6 +282,10 @@
 #define IGC_TXD_STAT_TC		0x00000004 /* Tx Underrun */
 #define IGC_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
 
+/* IPSec Encrypt Enable */
+#define IGC_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define IGC_ADVTXD_MSS_SHIFT	16 /* Adv ctxt MSS shift */
+
 /* Transmit Control */
 #define IGC_TCTL_EN		0x00000002 /* enable Tx */
 #define IGC_TCTL_PSP		0x00000008 /* pad short packets */
@@ -281,12 +318,21 @@
 #define IGC_RCTL_RDMTS_HALF	0x00000000 /* Rx desc min thresh size */
 #define IGC_RCTL_BAM		0x00008000 /* broadcast enable */
 
+/* Split Replication Receive Control */
+#define IGC_SRRCTL_TIMESTAMP		0x40000000
+#define IGC_SRRCTL_TIMER1SEL(timer)	(((timer) & 0x3) << 14)
+#define IGC_SRRCTL_TIMER0SEL(timer)	(((timer) & 0x3) << 17)
+
 /* Receive Descriptor bit definitions */
 #define IGC_RXD_STAT_EOP	0x02	/* End of Packet */
 #define IGC_RXD_STAT_IXSM	0x04	/* Ignore checksum */
 #define IGC_RXD_STAT_UDPCS	0x10	/* UDP xsum calculated */
 #define IGC_RXD_STAT_TCPCS	0x20	/* TCP xsum calculated */
 
+/* Advanced Receive Descriptor bit definitions */
+#define IGC_RXDADV_STAT_TSIP	0x08000 /* timestamp in packet */
+#define IGC_RXDADV_STAT_TS	0x10000 /* Pkt was time stamped */
+
 #define IGC_RXDEXT_STATERR_CE		0x01000000
 #define IGC_RXDEXT_STATERR_SE		0x02000000
 #define IGC_RXDEXT_STATERR_SEQ		0x04000000
@@ -323,6 +369,61 @@
 
 #define I225_RXPBSIZE_DEFAULT	0x000000A2 /* RXPBSIZE default */
 #define I225_TXPBSIZE_DEFAULT	0x04000014 /* TXPBSIZE default */
+#define IGC_RXPBS_CFG_TS_EN	0x80000000 /* Timestamp in Rx buffer */
+
+/* Time Sync Interrupt Causes */
+#define IGC_TSICR_SYS_WRAP	BIT(0) /* SYSTIM Wrap around. */
+#define IGC_TSICR_TXTS		BIT(1) /* Transmit Timestamp. */
+#define IGC_TSICR_TT0		BIT(3) /* Target Time 0 Trigger. */
+#define IGC_TSICR_TT1		BIT(4) /* Target Time 1 Trigger. */
+#define IGC_TSICR_AUTT0		BIT(5) /* Auxiliary Timestamp 0 Taken. */
+#define IGC_TSICR_AUTT1		BIT(6) /* Auxiliary Timestamp 1 Taken. */
+
+#define IGC_TSICR_INTERRUPTS	IGC_TSICR_TXTS
+
+/* PTP Queue Filter */
+#define IGC_ETQF_1588		BIT(30)
+
+#define IGC_FTQF_VF_BP		0x00008000
+#define IGC_FTQF_1588_TIME_STAMP	0x08000000
+#define IGC_FTQF_MASK			0xF0000000
+#define IGC_FTQF_MASK_PROTO_BP	0x10000000
+
+/* Time Sync Receive Control bit definitions */
+#define IGC_TSYNCRXCTL_VALID		0x00000001  /* Rx timestamp valid */
+#define IGC_TSYNCRXCTL_TYPE_MASK	0x0000000E  /* Rx type mask */
+#define IGC_TSYNCRXCTL_TYPE_L2_V2	0x00
+#define IGC_TSYNCRXCTL_TYPE_L4_V1	0x02
+#define IGC_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define IGC_TSYNCRXCTL_TYPE_ALL		0x08
+#define IGC_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
+#define IGC_TSYNCRXCTL_ENABLED		0x00000010  /* enable Rx timestamping */
+#define IGC_TSYNCRXCTL_SYSCFI		0x00000020  /* Sys clock frequency */
+#define IGC_TSYNCRXCTL_RXSYNSIG		0x00000400  /* Sample RX tstamp in PHY sop */
+
+/* Time Sync Receive Configuration */
+#define IGC_TSYNCRXCFG_PTP_V1_CTRLT_MASK	0x000000FF
+#define IGC_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE	0x00
+#define IGC_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE	0x01
+
+/* Immediate Interrupt Receive */
+#define IGC_IMIR_CLEAR_MASK	0xF001FFFF /* IMIR Reg Clear Mask */
+#define IGC_IMIR_PORT_BYPASS	0x20000 /* IMIR Port Bypass Bit */
+#define IGC_IMIR_PRIORITY_SHIFT	29 /* IMIR Priority Shift */
+#define IGC_IMIREXT_CLEAR_MASK	0x7FFFF /* IMIREXT Reg Clear Mask */
+
+/* Immediate Interrupt Receive Extended */
+#define IGC_IMIREXT_CTRL_BP	0x00080000  /* Bypass check of ctrl bits */
+#define IGC_IMIREXT_SIZE_BP	0x00001000  /* Packet size bypass */
+
+/* Time Sync Transmit Control bit definitions */
+#define IGC_TSYNCTXCTL_VALID			0x00000001  /* Tx timestamp valid */
+#define IGC_TSYNCTXCTL_ENABLED			0x00000010  /* enable Tx timestamping */
+#define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK	0x0000F000  /* max delay */
+#define IGC_TSYNCTXCTL_SYNC_COMP_ERR		0x20000000  /* sync err */
+#define IGC_TSYNCTXCTL_SYNC_COMP		0x40000000  /* sync complete */
+#define IGC_TSYNCTXCTL_START_SYNC		0x80000000  /* initiate sync */
+#define IGC_TSYNCTXCTL_TXSYNSIG			0x00000020  /* Sample TX tstamp in PHY sop */
 
 /* Receive Checksum Control */
 #define IGC_RXCSUM_CRCOFL	0x00000800   /* CRC32 offload enable */
@@ -363,6 +464,7 @@
 /* PHY Status Register */
 #define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
 #define MII_SR_AUTONEG_COMPLETE	0x0020 /* Auto Neg Complete */
+#define IGC_PHY_RST_COMP	0x0100 /* Internal PHY reset completion */
 
 /* PHY 1000 MII Register/Bit Definitions */
 /* PHY Registers defined by IEEE */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 455c1cdceb6e..ee07011e13e9 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1600,6 +1600,39 @@ static int igc_set_channels(struct net_device *netdev,
 	return 0;
 }
 
+static int igc_get_ts_info(struct net_device *dev,
+			   struct ethtool_ts_info *info)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	if (adapter->ptp_clock)
+		info->phc_index = ptp_clock_index(adapter->ptp_clock);
+	else
+		info->phc_index = -1;
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+		info->so_timestamping =
+			SOF_TIMESTAMPING_TX_SOFTWARE |
+			SOF_TIMESTAMPING_RX_SOFTWARE |
+			SOF_TIMESTAMPING_SOFTWARE |
+			SOF_TIMESTAMPING_TX_HARDWARE |
+			SOF_TIMESTAMPING_RX_HARDWARE |
+			SOF_TIMESTAMPING_RAW_HARDWARE;
+
+		info->tx_types =
+			BIT(HWTSTAMP_TX_OFF) |
+			BIT(HWTSTAMP_TX_ON);
+
+		info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
+
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static u32 igc_get_priv_flags(struct net_device *netdev)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
@@ -1847,6 +1880,7 @@ static const struct ethtool_ops igc_ethtool_ops = {
 	.get_rxfh_indir_size	= igc_get_rxfh_indir_size,
 	.get_rxfh		= igc_get_rxfh,
 	.set_rxfh		= igc_set_rxfh,
+	.get_ts_info		= igc_get_ts_info,
 	.get_channels		= igc_get_channels,
 	.set_channels		= igc_set_channels,
 	.get_priv_flags		= igc_get_priv_flags,
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index 20f710645746..90ac0e0144d8 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -21,8 +21,7 @@
 #define IGC_DEV_ID_I225_I			0x15F8
 #define IGC_DEV_ID_I220_V			0x15F7
 #define IGC_DEV_ID_I225_K			0x3100
-
-#define IGC_FUNC_0				0
+#define IGC_DEV_ID_I225_BLANK_NVM		0x15FD
 
 /* Function pointers for the MAC. */
 struct igc_mac_operations {
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9700527dd797..d9d5425fe8d9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -8,6 +8,7 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/ip.h>
+#include <linux/pm_runtime.h>
 
 #include <net/ipv6.h>
 
@@ -44,31 +45,13 @@ static const struct pci_device_id igc_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
 	/* required last entry */
 	{0, }
 };
 
 MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
 
-/* forward declaration */
-static void igc_clean_tx_ring(struct igc_ring *tx_ring);
-static int igc_sw_init(struct igc_adapter *);
-static void igc_configure(struct igc_adapter *adapter);
-static void igc_power_down_link(struct igc_adapter *adapter);
-static void igc_set_default_mac_filter(struct igc_adapter *adapter);
-static void igc_set_rx_mode(struct net_device *netdev);
-static void igc_write_itr(struct igc_q_vector *q_vector);
-static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector);
-static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx);
-static void igc_set_interrupt_capability(struct igc_adapter *adapter,
-					 bool msix);
-static void igc_free_q_vectors(struct igc_adapter *adapter);
-static void igc_irq_disable(struct igc_adapter *adapter);
-static void igc_irq_enable(struct igc_adapter *adapter);
-static void igc_configure_msix(struct igc_adapter *adapter);
-static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
-				  struct igc_rx_buffer *bi);
-
 enum latency_range {
 	lowest_latency = 0,
 	low_latency = 1,
@@ -76,6 +59,16 @@ enum latency_range {
 	latency_invalid = 255
 };
 
+/**
+ * igc_power_down_link - Power down the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_down_link(struct igc_adapter *adapter)
+{
+	if (adapter->hw.phy.media_type == igc_media_type_copper)
+		igc_power_down_phy_copper_base(&adapter->hw);
+}
+
 void igc_reset(struct igc_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
@@ -110,11 +103,14 @@ void igc_reset(struct igc_adapter *adapter)
 	if (!netif_running(adapter->netdev))
 		igc_power_down_link(adapter);
 
+	/* Re-enable PTP, where applicable. */
+	igc_ptp_reset(adapter);
+
 	igc_get_phy_info(hw);
 }
 
 /**
- * igc_power_up_link - Power up the phy/serdes link
+ * igc_power_up_link - Power up the phy link
  * @adapter: address of board private structure
  */
 static void igc_power_up_link(struct igc_adapter *adapter)
@@ -128,16 +124,6 @@ static void igc_power_up_link(struct igc_adapter *adapter)
 }
 
 /**
- * igc_power_down_link - Power down the phy/serdes link
- * @adapter: address of board private structure
- */
-static void igc_power_down_link(struct igc_adapter *adapter)
-{
-	if (adapter->hw.phy.media_type == igc_media_type_copper)
-		igc_power_down_phy_copper_base(&adapter->hw);
-}
-
-/**
  * igc_release_hw_control - release control of the h/w to f/w
  * @adapter: address of board private structure
  *
@@ -176,43 +162,6 @@ static void igc_get_hw_control(struct igc_adapter *adapter)
 }
 
 /**
- * igc_free_tx_resources - Free Tx Resources per Queue
- * @tx_ring: Tx descriptor ring for a specific queue
- *
- * Free all transmit software resources
- */
-void igc_free_tx_resources(struct igc_ring *tx_ring)
-{
-	igc_clean_tx_ring(tx_ring);
-
-	vfree(tx_ring->tx_buffer_info);
-	tx_ring->tx_buffer_info = NULL;
-
-	/* if not set, then don't free */
-	if (!tx_ring->desc)
-		return;
-
-	dma_free_coherent(tx_ring->dev, tx_ring->size,
-			  tx_ring->desc, tx_ring->dma);
-
-	tx_ring->desc = NULL;
-}
-
-/**
- * igc_free_all_tx_resources - Free Tx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all transmit software resources
- */
-static void igc_free_all_tx_resources(struct igc_adapter *adapter)
-{
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		igc_free_tx_resources(adapter->tx_ring[i]);
-}
-
-/**
  * igc_clean_tx_ring - Free Tx Buffers
  * @tx_ring: ring to be cleaned
  */
@@ -274,6 +223,43 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 }
 
 /**
+ * igc_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void igc_free_tx_resources(struct igc_ring *tx_ring)
+{
+	igc_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_all_tx_resources(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igc_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
  * igc_clean_all_tx_rings - Free Tx Buffers for all queues
  * @adapter: board private structure
  */
@@ -771,6 +757,51 @@ static void igc_setup_tctl(struct igc_adapter *adapter)
 }
 
 /**
+ * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
+ * @adapter: address of board private structure
+ * @index: Index of the RAR entry which need to be synced with MAC table
+ */
+static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
+{
+	u8 *addr = adapter->mac_table[index].addr;
+	struct igc_hw *hw = &adapter->hw;
+	u32 rar_low, rar_high;
+
+	/* HW expects these to be in network order when they are plugged
+	 * into the registers which are little endian.  In order to guarantee
+	 * that ordering we need to do an leXX_to_cpup here in order to be
+	 * ready for the byteswap that occurs with writel
+	 */
+	rar_low = le32_to_cpup((__le32 *)(addr));
+	rar_high = le16_to_cpup((__le16 *)(addr + 4));
+
+	/* Indicate to hardware the Address is Valid. */
+	if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
+		if (is_valid_ether_addr(addr))
+			rar_high |= IGC_RAH_AV;
+
+		rar_high |= IGC_RAH_POOL_1 <<
+			adapter->mac_table[index].queue;
+	}
+
+	wr32(IGC_RAL(index), rar_low);
+	wrfl();
+	wr32(IGC_RAH(index), rar_high);
+	wrfl();
+}
+
+/* Set default MAC address for the PF in the first RAR entry */
+static void igc_set_default_mac_filter(struct igc_adapter *adapter)
+{
+	struct igc_mac_addr *mac_table = &adapter->mac_table[0];
+
+	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
+	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+
+	igc_rar_set_index(adapter, 0);
+}
+
+/**
  * igc_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
  * @p: pointer to an address structure
@@ -850,7 +881,7 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 	/* set bits to identify this as an advanced context descriptor */
 	type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
 
-	/* For 82575, context index must be unique per ring. */
+	/* For i225, context index must be unique per ring. */
 	if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
 		mss_l4len_idx |= tx_ring->reg_idx << 4;
 
@@ -957,6 +988,11 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
 	return __igc_maybe_stop_tx(tx_ring, size);
 }
 
+#define IGC_SET_FLAG(_input, _flag, _result) \
+	(((_flag) <= (_result)) ?				\
+	 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :	\
+	 ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
+
 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
 {
 	/* set type for advanced descriptor with frame checksum insertion */
@@ -964,6 +1000,14 @@ static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
 		       IGC_ADVTXD_DCMD_DEXT |
 		       IGC_ADVTXD_DCMD_IFCS;
 
+	/* set segmentation bits for TSO */
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
+				 (IGC_ADVTXD_DCMD_TSE));
+
+	/* set timestamp bit if present */
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
+				 (IGC_ADVTXD_MAC_TSTAMP));
+
 	return cmd_type;
 }
 
@@ -1131,6 +1175,100 @@ dma_error:
 	return -1;
 }
 
+static int igc_tso(struct igc_ring *tx_ring,
+		   struct igc_tx_buffer *first,
+		   u8 *hdr_len)
+{
+	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+	type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
+		/* IP header will have to cancel out any data that
+		 * is not a part of the outer IP header
+		 */
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
+		type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
+
+		ip.v4->tot_len = 0;
+		first->tx_flags |= IGC_TX_FLAGS_TSO |
+				   IGC_TX_FLAGS_CSUM |
+				   IGC_TX_FLAGS_IPV4;
+	} else {
+		ip.v6->payload_len = 0;
+		first->tx_flags |= IGC_TX_FLAGS_TSO |
+				   IGC_TX_FLAGS_CSUM;
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+	if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+	} else {
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+	}
+
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* MSS L4LEN IDX */
+	mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
+	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
+
+	/* VLAN MACLEN IPLEN */
+	vlan_macip_lens = l4.hdr - ip.hdr;
+	vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
+
+	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
+			type_tucmd, mss_l4len_idx);
+
+	return 1;
+}
+
 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 				       struct igc_ring *tx_ring)
 {
@@ -1140,6 +1278,7 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 	u32 tx_flags = 0;
 	unsigned short f;
 	u8 hdr_len = 0;
+	int tso = 0;
 
 	/* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
 	 *	+ 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
@@ -1162,15 +1301,45 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 	first->bytecount = skb->len;
 	first->gso_segs = 1;
 
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
+
+		/* FIXME: add support for retrieving timestamps from
+		 * the other timer registers before skipping the
+		 * timestamping request.
+		 */
+		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
+		    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
+					   &adapter->state)) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			tx_flags |= IGC_TX_FLAGS_TSTAMP;
+
+			adapter->ptp_tx_skb = skb_get(skb);
+			adapter->ptp_tx_start = jiffies;
+		} else {
+			adapter->tx_hwtstamp_skipped++;
+		}
+	}
+
 	/* record initial flags and protocol */
 	first->tx_flags = tx_flags;
 	first->protocol = protocol;
 
-	igc_tx_csum(tx_ring, first);
+	tso = igc_tso(tx_ring, first, &hdr_len);
+	if (tso < 0)
+		goto out_drop;
+	else if (!tso)
+		igc_tx_csum(tx_ring, first);
 
 	igc_tx_map(tx_ring, first, hdr_len);
 
 	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+
+	return NETDEV_TX_OK;
 }
 
 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
@@ -1269,6 +1438,10 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring,
 
 	igc_rx_checksum(rx_ring, rx_desc, skb);
 
+	if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TS) &&
+	    !igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))
+		igc_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
 	skb_record_rx_queue(skb, rx_ring->queue_index);
 
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
@@ -1388,6 +1561,12 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 	if (unlikely(!skb))
 		return NULL;
 
+	if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) {
+		igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
+		va += IGC_TS_HDR_LEN;
+		size -= IGC_TS_HDR_LEN;
+	}
+
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > IGC_RX_HDR_LEN)
@@ -1485,7 +1664,6 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
  * igc_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
  * @rx_desc: Rx descriptor for current buffer
- * @skb: current socket buffer containing buffer in progress
  *
  * This function updates next to clean.  If the buffer is an EOP buffer
  * this function exits returning false, otherwise it will place the
@@ -1565,9 +1743,56 @@ static void igc_put_rx_buffer(struct igc_ring *rx_ring,
 	rx_buffer->page = NULL;
 }
 
+static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
+}
+
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+				  struct igc_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 igc_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 IGC_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_page(page);
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = igc_rx_offset(rx_ring);
+	bi->pagecnt_bias = 1;
+
+	return true;
+}
+
 /**
  * igc_alloc_rx_buffers - Replace used receive buffers; packet split
- * @adapter: address of board private structure
+ * @rx_ring: rx descriptor ring
+ * @cleaned_count: number of buffers to clean
  */
 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
 {
@@ -1725,52 +1950,6 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 	return total_packets;
 }
 
-static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
-{
-	return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
-}
-
-static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
-				  struct igc_rx_buffer *bi)
-{
-	struct page *page = bi->page;
-	dma_addr_t dma;
-
-	/* since we are recycling buffers we should seldom need to alloc */
-	if (likely(page))
-		return true;
-
-	/* alloc new page for storage */
-	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
-	if (unlikely(!page)) {
-		rx_ring->rx_stats.alloc_failed++;
-		return false;
-	}
-
-	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
-				 igc_rx_pg_size(rx_ring),
-				 DMA_FROM_DEVICE,
-				 IGC_RX_DMA_ATTR);
-
-	/* if mapping failed free memory back to system since
-	 * there isn't much point in holding memory we can't use
-	 */
-	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_page(page);
-
-		rx_ring->rx_stats.alloc_failed++;
-		return false;
-	}
-
-	bi->dma = dma;
-	bi->page = page;
-	bi->page_offset = igc_rx_offset(rx_ring);
-	bi->pagecnt_bias = 1;
-
-	return true;
-}
-
 /**
  * igc_clean_tx_irq - Reclaim resources after transmit completes
  * @q_vector: pointer to q_vector containing needed info
@@ -1942,6 +2121,1128 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 	return !!budget;
 }
 
+static void igc_nfc_filter_restore(struct igc_adapter *adapter)
+{
+	struct igc_nfc_filter *rule;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+		igc_add_filter(adapter, rule);
+
+	spin_unlock(&adapter->nfc_lock);
+}
+
+/* If the filter to be added and an already existing filter express
+ * the same address and address type, it should be possible to only
+ * override the other configurations, for example the queue to steer
+ * traffic.
+ */
+static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
+				      const u8 *addr, const u8 flags)
+{
+	if (!(entry->state & IGC_MAC_STATE_IN_USE))
+		return true;
+
+	if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
+	    (flags & IGC_MAC_STATE_SRC_ADDR))
+		return false;
+
+	if (!ether_addr_equal(addr, entry->addr))
+		return false;
+
+	return true;
+}
+
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_add_mac_filter(struct igc_adapter *adapter,
+			      const u8 *addr, const u8 queue)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for the first empty entry in the MAC table.
+	 * Do not touch entries at the end of the table reserved for the VF MAC
+	 * addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
+					       addr, 0))
+			continue;
+
+		ether_addr_copy(adapter->mac_table[i].addr, addr);
+		adapter->mac_table[i].queue = queue;
+		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE;
+
+		igc_rar_set_index(adapter, i);
+		return i;
+	}
+
+	return -ENOSPC;
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_del_mac_filter(struct igc_adapter *adapter,
+			      const u8 *addr, const u8 queue)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for matching entry in the MAC table based on given address
+	 * and queue. Do not touch entries at the end of the table reserved
+	 * for the VF MAC addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
+			continue;
+		if (adapter->mac_table[i].state != 0)
+			continue;
+		if (adapter->mac_table[i].queue != queue)
+			continue;
+		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
+			continue;
+
+		/* When a filter for the default address is "deleted",
+		 * we return it to its initial configuration
+		 */
+		if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
+			adapter->mac_table[i].state =
+				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+			adapter->mac_table[i].queue = 0;
+		} else {
+			adapter->mac_table[i].state = 0;
+			adapter->mac_table[i].queue = 0;
+			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+		}
+
+		igc_rar_set_index(adapter, i);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+	return min_t(int, ret, 0);
+}
+
+static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	igc_del_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+	return 0;
+}
+
+/**
+ * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ */
+static void igc_set_rx_mode(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	if (netdev->flags & IFF_PROMISC) {
+		rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI) {
+			rctl |= IGC_RCTL_MPE;
+		} else {
+			/* Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			count = igc_write_mc_addr_list(netdev);
+			if (count < 0)
+				rctl |= IGC_RCTL_MPE;
+		}
+	}
+
+	/* Write addresses to available RAR registers, if there is not
+	 * sufficient space to store all the addresses then enable
+	 * unicast promiscuous mode
+	 */
+	if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
+		rctl |= IGC_RCTL_UPE;
+
+	/* update state of unicast and multicast */
+	rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
+	wr32(IGC_RCTL, rctl);
+
+#if (PAGE_SIZE < 8192)
+	if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
+		rlpml = IGC_MAX_FRAME_BUILD_SKB;
+#endif
+	wr32(IGC_RLPML, rlpml);
+}
+
+/**
+ * igc_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ */
+static void igc_configure(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i = 0;
+
+	igc_get_hw_control(adapter);
+	igc_set_rx_mode(netdev);
+
+	igc_setup_tctl(adapter);
+	igc_setup_mrqc(adapter);
+	igc_setup_rctl(adapter);
+
+	igc_nfc_filter_restore(adapter);
+	igc_configure_tx(adapter);
+	igc_configure_rx(adapter);
+
+	igc_rx_fifo_flush_base(&adapter->hw);
+
+	/* call igc_desc_unused which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igc_ring *ring = adapter->rx_ring[i];
+
+		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+	}
+}
+
+/**
+ * igc_write_ivar - configure ivar for given MSI-X vector
+ * @hw: pointer to the HW structure
+ * @msix_vector: vector number we are allocating to a given ring
+ * @index: row index of IVAR register to write within IVAR table
+ * @offset: column offset of in IVAR, should be multiple of 8
+ *
+ * The IVAR table consists of 2 columns,
+ * each containing an cause allocation for an Rx and Tx ring, and a
+ * variable number of rows depending on the number of queues supported.
+ */
+static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
+			   int index, int offset)
+{
+	u32 ivar = array_rd32(IGC_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((u32)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
+
+	array_wr32(IGC_IVAR0, index, ivar);
+}
+
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+	int rx_queue = IGC_N0_QUEUE;
+	int tx_queue = IGC_N0_QUEUE;
+
+	if (q_vector->rx.ring)
+		rx_queue = q_vector->rx.ring->reg_idx;
+	if (q_vector->tx.ring)
+		tx_queue = q_vector->tx.ring->reg_idx;
+
+	switch (hw->mac.type) {
+	case igc_i225:
+		if (rx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       rx_queue >> 1,
+				       (rx_queue & 0x1) << 4);
+		if (tx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       tx_queue >> 1,
+				       ((tx_queue & 0x1) << 4) + 8);
+		q_vector->eims_value = BIT(msix_vector);
+		break;
+	default:
+		WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
+		break;
+	}
+
+	/* add q_vector eims value to global eims_enable_mask */
+	adapter->eims_enable_mask |= q_vector->eims_value;
+
+	/* configure q_vector to set itr on first interrupt */
+	q_vector->set_itr = 1;
+}
+
+/**
+ * igc_configure_msix - Configure MSI-X hardware
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ */
+static void igc_configure_msix(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i, vector = 0;
+	u32 tmp;
+
+	adapter->eims_enable_mask = 0;
+
+	/* set vector for other causes, i.e. link changes */
+	switch (hw->mac.type) {
+	case igc_i225:
+		/* Turn on MSI-X capability first, or our settings
+		 * won't stick.  And it will take days to debug.
+		 */
+		wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
+		     IGC_GPIE_PBA | IGC_GPIE_EIAME |
+		     IGC_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		adapter->eims_other = BIT(vector);
+		tmp = (vector++ | IGC_IVAR_VALID) << 8;
+
+		wr32(IGC_IVAR_MISC, tmp);
+		break;
+	default:
+		/* do nothing, since nothing else supports MSI-X */
+		break;
+	} /* switch (hw->mac.type) */
+
+	adapter->eims_enable_mask |= adapter->eims_other;
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		igc_assign_vector(adapter->q_vector[i], vector++);
+
+	wrfl();
+}
+
+/**
+ * igc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ */
+static void igc_irq_enable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
+		u32 regval = rd32(IGC_EIAC);
+
+		wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAM);
+		wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
+		wr32(IGC_EIMS, adapter->eims_enable_mask);
+		wr32(IGC_IMS, ims);
+	} else {
+		wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+		wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+	}
+}
+
+/**
+ * igc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ */
+static void igc_irq_disable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 regval = rd32(IGC_EIAM);
+
+		wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
+		wr32(IGC_EIMC, adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAC);
+		wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
+	}
+
+	wr32(IGC_IAM, 0);
+	wr32(IGC_IMC, ~0);
+	wrfl();
+
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		synchronize_irq(adapter->msix_entries[vector++].vector);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			synchronize_irq(adapter->msix_entries[vector++].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+			      const u32 max_rss_queues)
+{
+	/* Determine if we need to pair queues. */
+	/* If rss_queues > half of max_rss_queues, pair the queues in
+	 * order to conserve interrupts due to limited supply.
+	 */
+	if (adapter->rss_queues > (max_rss_queues / 2))
+		adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	else
+		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
+}
+
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
+{
+	unsigned int max_rss_queues;
+
+	/* Determine the maximum number of RSS queues supported. */
+	max_rss_queues = IGC_MAX_RX_QUEUES;
+
+	return max_rss_queues;
+}
+
+static void igc_init_queue_configuration(struct igc_adapter *adapter)
+{
+	u32 max_rss_queues;
+
+	max_rss_queues = igc_get_max_rss_queues(adapter);
+	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
+
+	igc_set_flag_queue_pairs(adapter, max_rss_queues);
+}
+
+/**
+ * igc_reset_q_vector - Reset config for interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be reset
+ *
+ * If NAPI is enabled it will delete any references to the
+ * NAPI struct. This is preparation for igc_free_q_vector.
+ */
+static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	/* if we're coming from igc_set_interrupt_capability, the vectors are
+	 * not yet allocated
+	 */
+	if (!q_vector)
+		return;
+
+	if (q_vector->tx.ring)
+		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+	if (q_vector->rx.ring)
+		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+	netif_napi_del(&q_vector->napi);
+}
+
+/**
+ * igc_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.
+ */
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	adapter->q_vector[v_idx] = NULL;
+
+	/* igc_get_stats64() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	if (q_vector)
+		kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * igc_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ */
+static void igc_free_q_vectors(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--) {
+		igc_reset_q_vector(adapter, v_idx);
+		igc_free_q_vector(adapter, v_idx);
+	}
+}
+
+/**
+ * igc_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ * NOTE: These calculations are only valid when operating in a single-
+ * queue environment.
+ */
+static void igc_update_itr(struct igc_q_vector *q_vector,
+			   struct igc_ring_container *ring_container)
+{
+	unsigned int packets = ring_container->total_packets;
+	unsigned int bytes = ring_container->total_bytes;
+	u8 itrval = ring_container->itr;
+
+	/* no packets, exit with status unchanged */
+	if (packets == 0)
+		return;
+
+	switch (itrval) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes / packets > 8000)
+			itrval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			itrval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes / packets > 8000)
+				itrval = bulk_latency;
+			else if ((packets < 10) || ((bytes / packets) > 1200))
+				itrval = bulk_latency;
+			else if ((packets > 35))
+				itrval = lowest_latency;
+		} else if (bytes / packets > 2000) {
+			itrval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			itrval = lowest_latency;
+		}
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				itrval = low_latency;
+		} else if (bytes < 1500) {
+			itrval = low_latency;
+		}
+		break;
+	}
+
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itrval;
+}
+
+static void igc_set_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	u32 new_itr = q_vector->itr_val;
+	u8 current_itr = 0;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		current_itr = 0;
+		new_itr = IGC_4K_ITR;
+		goto set_itr_now;
+	default:
+		break;
+	}
+
+	igc_update_itr(q_vector, &q_vector->tx);
+	igc_update_itr(q_vector, &q_vector->rx);
+
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (current_itr == lowest_latency &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		current_itr = low_latency;
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
+		break;
+	case low_latency:
+		new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
+		break;
+	case bulk_latency:
+		new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != q_vector->itr_val) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > q_vector->itr_val ?
+			  max((new_itr * q_vector->itr_val) /
+			  (new_itr + (q_vector->itr_val >> 2)),
+			  new_itr) : new_itr;
+		/* Don't write the value here; it resets the adapter's
+		 * internal timer, and causes us to delay far longer than
+		 * we should between interrupts.  Instead, we write the ITR
+		 * value at the beginning of the next interrupt so the timing
+		 * ends up being correct.
+		 */
+		q_vector->itr_val = new_itr;
+		q_vector->set_itr = 1;
+	}
+}
+
+static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	if (adapter->msix_entries) {
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	} else if (adapter->flags & IGC_FLAG_HAS_MSI) {
+		pci_disable_msi(adapter->pdev);
+	}
+
+	while (v_idx--)
+		igc_reset_q_vector(adapter, v_idx);
+}
+
+/**
+ * igc_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: Pointer to adapter structure
+ * @msix: boolean value for MSI-X capability
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+					 bool msix)
+{
+	int numvecs, i;
+	int err;
+
+	if (!msix)
+		goto msi_only;
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	/* Number of supported queues. */
+	adapter->num_rx_queues = adapter->rss_queues;
+
+	adapter->num_tx_queues = adapter->rss_queues;
+
+	/* start with one vector for every Rx queue */
+	numvecs = adapter->num_rx_queues;
+
+	/* if Tx handler is separate add 1 for every Tx queue */
+	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
+		numvecs += adapter->num_tx_queues;
+
+	/* store the number of vectors reserved for queues */
+	adapter->num_q_vectors = numvecs;
+
+	/* add 1 vector for link status interrupts */
+	numvecs++;
+
+	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
+					GFP_KERNEL);
+
+	if (!adapter->msix_entries)
+		return;
+
+	/* populate entry values */
+	for (i = 0; i < numvecs; i++)
+		adapter->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_range(adapter->pdev,
+				    adapter->msix_entries,
+				    numvecs,
+				    numvecs);
+	if (err > 0)
+		return;
+
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+
+	igc_reset_interrupt_capability(adapter);
+
+msi_only:
+	adapter->flags &= ~IGC_FLAG_HAS_MSIX;
+
+	adapter->rss_queues = 1;
+	adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
+	adapter->num_q_vectors = 1;
+	if (!pci_enable_msi(adapter->pdev))
+		adapter->flags |= IGC_FLAG_HAS_MSI;
+}
+
+/**
+ * igc_update_ring_itr - update the dynamic ITR value based on packet size
+ * @q_vector: pointer to q_vector
+ *
+ * Stores a new ITR value based on strictly on packet size.  This
+ * algorithm is less sophisticated than that used in igc_update_itr,
+ * due to the difficulty of synchronizing statistics across multiple
+ * receive rings.  The divisors and thresholds used by this function
+ * were determined based on theoretical maximum wire speed and testing
+ * data, in order to minimize response time while increasing bulk
+ * throughput.
+ * NOTE: This function is called only when operating in a multiqueue
+ * receive environment.
+ */
+static void igc_update_ring_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	int new_val = q_vector->itr_val;
+	int avg_wire_size = 0;
+	unsigned int packets;
+
+	/* For non-gigabit speeds, just fix the interrupt rate at 4000
+	 * ints/sec - ITR timer value of 120 ticks.
+	 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		new_val = IGC_4K_ITR;
+		goto set_itr_val;
+	default:
+		break;
+	}
+
+	packets = q_vector->rx.total_packets;
+	if (packets)
+		avg_wire_size = q_vector->rx.total_bytes / packets;
+
+	packets = q_vector->tx.total_packets;
+	if (packets)
+		avg_wire_size = max_t(u32, avg_wire_size,
+				      q_vector->tx.total_bytes / packets);
+
+	/* if avg_wire_size isn't set no work was done */
+	if (!avg_wire_size)
+		goto clear_counts;
+
+	/* Add 24 bytes to size to account for CRC, preamble, and gap */
+	avg_wire_size += 24;
+
+	/* Don't starve jumbo frames */
+	avg_wire_size = min(avg_wire_size, 3000);
+
+	/* Give a little boost to mid-size frames */
+	if (avg_wire_size > 300 && avg_wire_size < 1200)
+		new_val = avg_wire_size / 3;
+	else
+		new_val = avg_wire_size / 2;
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (new_val < IGC_20K_ITR &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		new_val = IGC_20K_ITR;
+
+set_itr_val:
+	if (new_val != q_vector->itr_val) {
+		q_vector->itr_val = new_val;
+		q_vector->set_itr = 1;
+	}
+clear_counts:
+	q_vector->rx.total_bytes = 0;
+	q_vector->rx.total_packets = 0;
+	q_vector->tx.total_bytes = 0;
+	q_vector->tx.total_packets = 0;
+}
+
+static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+
+	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+		if (adapter->num_q_vectors == 1)
+			igc_set_itr(q_vector);
+		else
+			igc_update_ring_itr(q_vector);
+	}
+
+	if (!test_bit(__IGC_DOWN, &adapter->state)) {
+		if (adapter->msix_entries)
+			wr32(IGC_EIMS, q_vector->eims_value);
+		else
+			igc_irq_enable(adapter);
+	}
+}
+
+static void igc_add_ring(struct igc_ring *ring,
+			 struct igc_ring_container *head)
+{
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * igc_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ */
+static void igc_cache_ring_register(struct igc_adapter *adapter)
+{
+	int i = 0, j = 0;
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+	/* Fall through */
+	default:
+		for (; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->reg_idx = i;
+		for (; j < adapter->num_tx_queues; j++)
+			adapter->tx_ring[j]->reg_idx = j;
+		break;
+	}
+}
+
+/**
+ * igc_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ */
+static int igc_poll(struct napi_struct *napi, int budget)
+{
+	struct igc_q_vector *q_vector = container_of(napi,
+						     struct igc_q_vector,
+						     napi);
+	bool clean_complete = true;
+	int work_done = 0;
+
+	if (q_vector->tx.ring)
+		clean_complete = igc_clean_tx_irq(q_vector, budget);
+
+	if (q_vector->rx.ring) {
+		int cleaned = igc_clean_rx_irq(q_vector, budget);
+
+		work_done += cleaned;
+		if (cleaned >= budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		igc_ring_irq_enable(q_vector);
+
+	return min(work_done, budget - 1);
+}
+
+/**
+ * igc_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ */
+static int igc_alloc_q_vector(struct igc_adapter *adapter,
+			      unsigned int v_count, unsigned int v_idx,
+			      unsigned int txr_count, unsigned int txr_idx,
+			      unsigned int rxr_count, unsigned int rxr_idx)
+{
+	struct igc_q_vector *q_vector;
+	struct igc_ring *ring;
+	int ring_count;
+
+	/* igc only supports 1 Tx and/or 1 Rx queue per vector */
+	if (txr_count > 1 || rxr_count > 1)
+		return -ENOMEM;
+
+	ring_count = txr_count + rxr_count;
+
+	/* allocate q_vector and rings */
+	q_vector = adapter->q_vector[v_idx];
+	if (!q_vector)
+		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+				   GFP_KERNEL);
+	else
+		memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi,
+		       igc_poll, 64);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = adapter->tx_work_limit;
+
+	/* initialize ITR configuration */
+	q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
+	q_vector->itr_val = IGC_START_ITR;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	/* initialize ITR */
+	if (rxr_count) {
+		/* rx or rx/tx vector */
+		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+			q_vector->itr_val = adapter->rx_itr_setting;
+	} else {
+		/* tx only vector */
+		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+			q_vector->itr_val = adapter->tx_itr_setting;
+	}
+
+	if (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		igc_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		/* assign ring to adapter */
+		adapter->tx_ring[txr_idx] = ring;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	if (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		igc_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+	}
+
+	return 0;
+}
+
+/**
+ * igc_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ */
+static int igc_alloc_q_vectors(struct igc_adapter *adapter)
+{
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int q_vectors = adapter->num_q_vectors;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+						 0, 0, 1, rxr_idx);
+
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+		err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+					 tqpv, txr_idx, rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		igc_free_q_vector(adapter, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ * @adapter: Pointer to adapter structure
+ * @msix: boolean for MSI-X capability
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ */
+static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	igc_set_interrupt_capability(adapter, msix);
+
+	err = igc_alloc_q_vectors(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	igc_cache_ring_register(adapter);
+
+	return 0;
+
+err_alloc_q_vectors:
+	igc_reset_interrupt_capability(adapter);
+	return err;
+}
+
+/**
+ * igc_sw_init - Initialize general software structures (struct igc_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igc_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ */
+static int igc_sw_init(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+
+	int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IGC_DEFAULT_TXD;
+	adapter->rx_ring_count = IGC_DEFAULT_RXD;
+
+	/* set default ITR values */
+	adapter->rx_itr_setting = IGC_DEFAULT_ITR;
+	adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+
+	/* adjust max frame to be at least the size of a standard frame */
+	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+				VLAN_HLEN;
+	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+	spin_lock_init(&adapter->nfc_lock);
+	spin_lock_init(&adapter->stats64_lock);
+	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	adapter->mac_table = kzalloc(size, GFP_ATOMIC);
+	if (!adapter->mac_table)
+		return -ENOMEM;
+
+	igc_init_queue_configuration(adapter);
+
+	/* This call may decrease the number of queues */
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	igc_irq_disable(adapter);
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	return 0;
+}
+
 /**
  * igc_up - Open the interface and prepare it to handle traffic
  * @adapter: board private structure
@@ -2163,18 +3464,6 @@ static void igc_nfc_filter_exit(struct igc_adapter *adapter)
 	spin_unlock(&adapter->nfc_lock);
 }
 
-static void igc_nfc_filter_restore(struct igc_adapter *adapter)
-{
-	struct igc_nfc_filter *rule;
-
-	spin_lock(&adapter->nfc_lock);
-
-	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
-		igc_add_filter(adapter, rule);
-
-	spin_unlock(&adapter->nfc_lock);
-}
-
 /**
  * igc_down - Close the interface
  * @adapter: board private structure
@@ -2398,105 +3687,6 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
 	return features;
 }
 
-/**
- * igc_configure - configure the hardware for RX and TX
- * @adapter: private board structure
- */
-static void igc_configure(struct igc_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	int i = 0;
-
-	igc_get_hw_control(adapter);
-	igc_set_rx_mode(netdev);
-
-	igc_setup_tctl(adapter);
-	igc_setup_mrqc(adapter);
-	igc_setup_rctl(adapter);
-
-	igc_nfc_filter_restore(adapter);
-	igc_configure_tx(adapter);
-	igc_configure_rx(adapter);
-
-	igc_rx_fifo_flush_base(&adapter->hw);
-
-	/* call igc_desc_unused which always leaves
-	 * at least 1 descriptor unused to make sure
-	 * next_to_use != next_to_clean
-	 */
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		struct igc_ring *ring = adapter->rx_ring[i];
-
-		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
-	}
-}
-
-/**
- * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
- * @adapter: address of board private structure
- * @index: Index of the RAR entry which need to be synced with MAC table
- */
-static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
-{
-	u8 *addr = adapter->mac_table[index].addr;
-	struct igc_hw *hw = &adapter->hw;
-	u32 rar_low, rar_high;
-
-	/* HW expects these to be in network order when they are plugged
-	 * into the registers which are little endian.  In order to guarantee
-	 * that ordering we need to do an leXX_to_cpup here in order to be
-	 * ready for the byteswap that occurs with writel
-	 */
-	rar_low = le32_to_cpup((__le32 *)(addr));
-	rar_high = le16_to_cpup((__le16 *)(addr + 4));
-
-	/* Indicate to hardware the Address is Valid. */
-	if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
-		if (is_valid_ether_addr(addr))
-			rar_high |= IGC_RAH_AV;
-
-		rar_high |= IGC_RAH_POOL_1 <<
-			adapter->mac_table[index].queue;
-	}
-
-	wr32(IGC_RAL(index), rar_low);
-	wrfl();
-	wr32(IGC_RAH(index), rar_high);
-	wrfl();
-}
-
-/* Set default MAC address for the PF in the first RAR entry */
-static void igc_set_default_mac_filter(struct igc_adapter *adapter)
-{
-	struct igc_mac_addr *mac_table = &adapter->mac_table[0];
-
-	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
-	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
-
-	igc_rar_set_index(adapter, 0);
-}
-
-/* If the filter to be added and an already existing filter express
- * the same address and address type, it should be possible to only
- * override the other configurations, for example the queue to steer
- * traffic.
- */
-static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
-				      const u8 *addr, const u8 flags)
-{
-	if (!(entry->state & IGC_MAC_STATE_IN_USE))
-		return true;
-
-	if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
-	    (flags & IGC_MAC_STATE_SRC_ADDR))
-		return false;
-
-	if (!ether_addr_equal(addr, entry->addr))
-		return false;
-
-	return true;
-}
-
 /* Add a MAC filter for 'addr' directing matching traffic to 'queue',
  * 'flags' is used to indicate what kind of match is made, match is by
  * default for the destination address, if matching by source address
@@ -2597,159 +3787,20 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter,
 					IGC_MAC_STATE_QUEUE_STEERING | flags);
 }
 
-/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
- * 'flags' is used to indicate what kind of match is made, match is by
- * default for the destination address, if matching by source address
- * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
- */
-static int igc_add_mac_filter(struct igc_adapter *adapter,
-			      const u8 *addr, const u8 queue)
+static void igc_tsync_interrupt(struct igc_adapter *adapter)
 {
 	struct igc_hw *hw = &adapter->hw;
-	int rar_entries = hw->mac.rar_entry_count;
-	int i;
-
-	if (is_zero_ether_addr(addr))
-		return -EINVAL;
-
-	/* Search for the first empty entry in the MAC table.
-	 * Do not touch entries at the end of the table reserved for the VF MAC
-	 * addresses.
-	 */
-	for (i = 0; i < rar_entries; i++) {
-		if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
-					       addr, 0))
-			continue;
-
-		ether_addr_copy(adapter->mac_table[i].addr, addr);
-		adapter->mac_table[i].queue = queue;
-		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE;
-
-		igc_rar_set_index(adapter, i);
-		return i;
-	}
-
-	return -ENOSPC;
-}
-
-/* Remove a MAC filter for 'addr' directing matching traffic to
- * 'queue', 'flags' is used to indicate what kind of match need to be
- * removed, match is by default for the destination address, if
- * matching by source address is to be removed the flag
- * IGC_MAC_STATE_SRC_ADDR can be used.
- */
-static int igc_del_mac_filter(struct igc_adapter *adapter,
-			      const u8 *addr, const u8 queue)
-{
-	struct igc_hw *hw = &adapter->hw;
-	int rar_entries = hw->mac.rar_entry_count;
-	int i;
-
-	if (is_zero_ether_addr(addr))
-		return -EINVAL;
-
-	/* Search for matching entry in the MAC table based on given address
-	 * and queue. Do not touch entries at the end of the table reserved
-	 * for the VF MAC addresses.
-	 */
-	for (i = 0; i < rar_entries; i++) {
-		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
-			continue;
-		if (adapter->mac_table[i].state != 0)
-			continue;
-		if (adapter->mac_table[i].queue != queue)
-			continue;
-		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
-			continue;
-
-		/* When a filter for the default address is "deleted",
-		 * we return it to its initial configuration
-		 */
-		if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
-			adapter->mac_table[i].state =
-				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
-			adapter->mac_table[i].queue = 0;
-		} else {
-			adapter->mac_table[i].state = 0;
-			adapter->mac_table[i].queue = 0;
-			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
-		}
+	u32 tsicr = rd32(IGC_TSICR);
+	u32 ack = 0;
 
-		igc_rar_set_index(adapter, i);
-		return 0;
+	if (tsicr & IGC_TSICR_TXTS) {
+		/* retrieve hardware timestamp */
+		schedule_work(&adapter->ptp_tx_work);
+		ack |= IGC_TSICR_TXTS;
 	}
 
-	return -ENOENT;
-}
-
-static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
-{
-	struct igc_adapter *adapter = netdev_priv(netdev);
-	int ret;
-
-	ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues);
-
-	return min_t(int, ret, 0);
-}
-
-static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
-{
-	struct igc_adapter *adapter = netdev_priv(netdev);
-
-	igc_del_mac_filter(adapter, addr, adapter->num_rx_queues);
-
-	return 0;
-}
-
-/**
- * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
- * @netdev: network interface device structure
- *
- * The set_rx_mode entry point is called whenever the unicast or multicast
- * address lists or the network interface flags are updated.  This routine is
- * responsible for configuring the hardware for proper unicast, multicast,
- * promiscuous mode, and all-multi behavior.
- */
-static void igc_set_rx_mode(struct net_device *netdev)
-{
-	struct igc_adapter *adapter = netdev_priv(netdev);
-	struct igc_hw *hw = &adapter->hw;
-	u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
-	int count;
-
-	/* Check for Promiscuous and All Multicast modes */
-	if (netdev->flags & IFF_PROMISC) {
-		rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
-	} else {
-		if (netdev->flags & IFF_ALLMULTI) {
-			rctl |= IGC_RCTL_MPE;
-		} else {
-			/* Write addresses to the MTA, if the attempt fails
-			 * then we should just turn on promiscuous mode so
-			 * that we can at least receive multicast traffic
-			 */
-			count = igc_write_mc_addr_list(netdev);
-			if (count < 0)
-				rctl |= IGC_RCTL_MPE;
-		}
-	}
-
-	/* Write addresses to available RAR registers, if there is not
-	 * sufficient space to store all the addresses then enable
-	 * unicast promiscuous mode
-	 */
-	if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
-		rctl |= IGC_RCTL_UPE;
-
-	/* update state of unicast and multicast */
-	rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
-	wr32(IGC_RCTL, rctl);
-
-#if (PAGE_SIZE < 8192)
-	if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
-		rlpml = IGC_MAX_FRAME_BUILD_SKB;
-#endif
-	wr32(IGC_RLPML, rlpml);
+	/* acknowledge the interrupts */
+	wr32(IGC_TSICR, ack);
 }
 
 /**
@@ -2779,114 +3830,28 @@ static irqreturn_t igc_msix_other(int irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
+	if (icr & IGC_ICR_TS)
+		igc_tsync_interrupt(adapter);
+
 	wr32(IGC_EIMS, adapter->eims_other);
 
 	return IRQ_HANDLED;
 }
 
-/**
- * igc_write_ivar - configure ivar for given MSI-X vector
- * @hw: pointer to the HW structure
- * @msix_vector: vector number we are allocating to a given ring
- * @index: row index of IVAR register to write within IVAR table
- * @offset: column offset of in IVAR, should be multiple of 8
- *
- * The IVAR table consists of 2 columns,
- * each containing an cause allocation for an Rx and Tx ring, and a
- * variable number of rows depending on the number of queues supported.
- */
-static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
-			   int index, int offset)
-{
-	u32 ivar = array_rd32(IGC_IVAR0, index);
-
-	/* clear any bits that are currently set */
-	ivar &= ~((u32)0xFF << offset);
-
-	/* write vector and valid bit */
-	ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
-
-	array_wr32(IGC_IVAR0, index, ivar);
-}
-
-static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
-{
-	struct igc_adapter *adapter = q_vector->adapter;
-	struct igc_hw *hw = &adapter->hw;
-	int rx_queue = IGC_N0_QUEUE;
-	int tx_queue = IGC_N0_QUEUE;
-
-	if (q_vector->rx.ring)
-		rx_queue = q_vector->rx.ring->reg_idx;
-	if (q_vector->tx.ring)
-		tx_queue = q_vector->tx.ring->reg_idx;
-
-	switch (hw->mac.type) {
-	case igc_i225:
-		if (rx_queue > IGC_N0_QUEUE)
-			igc_write_ivar(hw, msix_vector,
-				       rx_queue >> 1,
-				       (rx_queue & 0x1) << 4);
-		if (tx_queue > IGC_N0_QUEUE)
-			igc_write_ivar(hw, msix_vector,
-				       tx_queue >> 1,
-				       ((tx_queue & 0x1) << 4) + 8);
-		q_vector->eims_value = BIT(msix_vector);
-		break;
-	default:
-		WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
-		break;
-	}
-
-	/* add q_vector eims value to global eims_enable_mask */
-	adapter->eims_enable_mask |= q_vector->eims_value;
-
-	/* configure q_vector to set itr on first interrupt */
-	q_vector->set_itr = 1;
-}
-
-/**
- * igc_configure_msix - Configure MSI-X hardware
- * @adapter: Pointer to adapter structure
- *
- * igc_configure_msix sets up the hardware to properly
- * generate MSI-X interrupts.
- */
-static void igc_configure_msix(struct igc_adapter *adapter)
+static void igc_write_itr(struct igc_q_vector *q_vector)
 {
-	struct igc_hw *hw = &adapter->hw;
-	int i, vector = 0;
-	u32 tmp;
-
-	adapter->eims_enable_mask = 0;
-
-	/* set vector for other causes, i.e. link changes */
-	switch (hw->mac.type) {
-	case igc_i225:
-		/* Turn on MSI-X capability first, or our settings
-		 * won't stick.  And it will take days to debug.
-		 */
-		wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
-		     IGC_GPIE_PBA | IGC_GPIE_EIAME |
-		     IGC_GPIE_NSICR);
-
-		/* enable msix_other interrupt */
-		adapter->eims_other = BIT(vector);
-		tmp = (vector++ | IGC_IVAR_VALID) << 8;
+	u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
 
-		wr32(IGC_IVAR_MISC, tmp);
-		break;
-	default:
-		/* do nothing, since nothing else supports MSI-X */
-		break;
-	} /* switch (hw->mac.type) */
+	if (!q_vector->set_itr)
+		return;
 
-	adapter->eims_enable_mask |= adapter->eims_other;
+	if (!itr_val)
+		itr_val = IGC_ITR_VAL_MASK;
 
-	for (i = 0; i < adapter->num_q_vectors; i++)
-		igc_assign_vector(adapter->q_vector[i], vector++);
+	itr_val |= IGC_EITR_CNT_IGNR;
 
-	wrfl();
+	writel(itr_val, q_vector->itr_register);
+	q_vector->set_itr = 0;
 }
 
 static irqreturn_t igc_msix_ring(int irq, void *data)
@@ -2961,49 +3926,6 @@ err_out:
 }
 
 /**
- * igc_reset_q_vector - Reset config for interrupt vector
- * @adapter: board private structure to initialize
- * @v_idx: Index of vector to be reset
- *
- * If NAPI is enabled it will delete any references to the
- * NAPI struct. This is preparation for igc_free_q_vector.
- */
-static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
-{
-	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
-
-	/* if we're coming from igc_set_interrupt_capability, the vectors are
-	 * not yet allocated
-	 */
-	if (!q_vector)
-		return;
-
-	if (q_vector->tx.ring)
-		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
-
-	if (q_vector->rx.ring)
-		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
-
-	netif_napi_del(&q_vector->napi);
-}
-
-static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
-{
-	int v_idx = adapter->num_q_vectors;
-
-	if (adapter->msix_entries) {
-		pci_disable_msix(adapter->pdev);
-		kfree(adapter->msix_entries);
-		adapter->msix_entries = NULL;
-	} else if (adapter->flags & IGC_FLAG_HAS_MSI) {
-		pci_disable_msi(adapter->pdev);
-	}
-
-	while (v_idx--)
-		igc_reset_q_vector(adapter, v_idx);
-}
-
-/**
  * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
  * @adapter: Pointer to adapter structure
  *
@@ -3016,48 +3938,6 @@ static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
 	igc_reset_interrupt_capability(adapter);
 }
 
-/**
- * igc_free_q_vectors - Free memory allocated for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * This function frees the memory allocated to the q_vectors.  In addition if
- * NAPI is enabled it will delete any references to the NAPI struct prior
- * to freeing the q_vector.
- */
-static void igc_free_q_vectors(struct igc_adapter *adapter)
-{
-	int v_idx = adapter->num_q_vectors;
-
-	adapter->num_tx_queues = 0;
-	adapter->num_rx_queues = 0;
-	adapter->num_q_vectors = 0;
-
-	while (v_idx--) {
-		igc_reset_q_vector(adapter, v_idx);
-		igc_free_q_vector(adapter, v_idx);
-	}
-}
-
-/**
- * igc_free_q_vector - Free memory allocated for specific interrupt vector
- * @adapter: board private structure to initialize
- * @v_idx: Index of vector to be freed
- *
- * This function frees the memory allocated to the q_vector.
- */
-static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
-{
-	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
-
-	adapter->q_vector[v_idx] = NULL;
-
-	/* igc_get_stats64() might access the rings on this vector,
-	 * we must wait a grace period before freeing it.
-	 */
-	if (q_vector)
-		kfree_rcu(q_vector, rcu);
-}
-
 /* Need to wait a few seconds after link up to get diagnostic information from
  * the phy
  */
@@ -3109,7 +3989,7 @@ bool igc_has_link(struct igc_adapter *adapter)
 
 /**
  * igc_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
+ * @t: timer for the watchdog
  */
 static void igc_watchdog(struct timer_list *t)
 {
@@ -3282,6 +4162,8 @@ no_wait:
 		wr32(IGC_ICS, IGC_ICS_RXDMT0);
 	}
 
+	igc_ptp_tx_hang(adapter);
+
 	/* Reset the timer */
 	if (!test_bit(__IGC_DOWN, &adapter->state)) {
 		if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
@@ -3294,149 +4176,6 @@ no_wait:
 }
 
 /**
- * igc_update_ring_itr - update the dynamic ITR value based on packet size
- * @q_vector: pointer to q_vector
- *
- * Stores a new ITR value based on strictly on packet size.  This
- * algorithm is less sophisticated than that used in igc_update_itr,
- * due to the difficulty of synchronizing statistics across multiple
- * receive rings.  The divisors and thresholds used by this function
- * were determined based on theoretical maximum wire speed and testing
- * data, in order to minimize response time while increasing bulk
- * throughput.
- * NOTE: This function is called only when operating in a multiqueue
- * receive environment.
- */
-static void igc_update_ring_itr(struct igc_q_vector *q_vector)
-{
-	struct igc_adapter *adapter = q_vector->adapter;
-	int new_val = q_vector->itr_val;
-	int avg_wire_size = 0;
-	unsigned int packets;
-
-	/* For non-gigabit speeds, just fix the interrupt rate at 4000
-	 * ints/sec - ITR timer value of 120 ticks.
-	 */
-	switch (adapter->link_speed) {
-	case SPEED_10:
-	case SPEED_100:
-		new_val = IGC_4K_ITR;
-		goto set_itr_val;
-	default:
-		break;
-	}
-
-	packets = q_vector->rx.total_packets;
-	if (packets)
-		avg_wire_size = q_vector->rx.total_bytes / packets;
-
-	packets = q_vector->tx.total_packets;
-	if (packets)
-		avg_wire_size = max_t(u32, avg_wire_size,
-				      q_vector->tx.total_bytes / packets);
-
-	/* if avg_wire_size isn't set no work was done */
-	if (!avg_wire_size)
-		goto clear_counts;
-
-	/* Add 24 bytes to size to account for CRC, preamble, and gap */
-	avg_wire_size += 24;
-
-	/* Don't starve jumbo frames */
-	avg_wire_size = min(avg_wire_size, 3000);
-
-	/* Give a little boost to mid-size frames */
-	if (avg_wire_size > 300 && avg_wire_size < 1200)
-		new_val = avg_wire_size / 3;
-	else
-		new_val = avg_wire_size / 2;
-
-	/* conservative mode (itr 3) eliminates the lowest_latency setting */
-	if (new_val < IGC_20K_ITR &&
-	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
-	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
-		new_val = IGC_20K_ITR;
-
-set_itr_val:
-	if (new_val != q_vector->itr_val) {
-		q_vector->itr_val = new_val;
-		q_vector->set_itr = 1;
-	}
-clear_counts:
-	q_vector->rx.total_bytes = 0;
-	q_vector->rx.total_packets = 0;
-	q_vector->tx.total_bytes = 0;
-	q_vector->tx.total_packets = 0;
-}
-
-/**
- * igc_update_itr - update the dynamic ITR value based on statistics
- * @q_vector: pointer to q_vector
- * @ring_container: ring info to update the itr for
- *
- * Stores a new ITR value based on packets and byte
- * counts during the last interrupt.  The advantage of per interrupt
- * computation is faster updates and more accurate ITR for the current
- * traffic pattern.  Constants in this function were computed
- * based on theoretical maximum wire speed and thresholds were set based
- * on testing data as well as attempting to minimize response time
- * while increasing bulk throughput.
- * NOTE: These calculations are only valid when operating in a single-
- * queue environment.
- */
-static void igc_update_itr(struct igc_q_vector *q_vector,
-			   struct igc_ring_container *ring_container)
-{
-	unsigned int packets = ring_container->total_packets;
-	unsigned int bytes = ring_container->total_bytes;
-	u8 itrval = ring_container->itr;
-
-	/* no packets, exit with status unchanged */
-	if (packets == 0)
-		return;
-
-	switch (itrval) {
-	case lowest_latency:
-		/* handle TSO and jumbo frames */
-		if (bytes / packets > 8000)
-			itrval = bulk_latency;
-		else if ((packets < 5) && (bytes > 512))
-			itrval = low_latency;
-		break;
-	case low_latency:  /* 50 usec aka 20000 ints/s */
-		if (bytes > 10000) {
-			/* this if handles the TSO accounting */
-			if (bytes / packets > 8000)
-				itrval = bulk_latency;
-			else if ((packets < 10) || ((bytes / packets) > 1200))
-				itrval = bulk_latency;
-			else if ((packets > 35))
-				itrval = lowest_latency;
-		} else if (bytes / packets > 2000) {
-			itrval = bulk_latency;
-		} else if (packets <= 2 && bytes < 512) {
-			itrval = lowest_latency;
-		}
-		break;
-	case bulk_latency: /* 250 usec aka 4000 ints/s */
-		if (bytes > 25000) {
-			if (packets > 35)
-				itrval = low_latency;
-		} else if (bytes < 1500) {
-			itrval = low_latency;
-		}
-		break;
-	}
-
-	/* clear work counters since we have the values we need */
-	ring_container->total_bytes = 0;
-	ring_container->total_packets = 0;
-
-	/* write updated itr to ring container */
-	ring_container->itr = itrval;
-}
-
-/**
  * igc_intr_msi - Interrupt Handler
  * @irq: interrupt number
  * @data: pointer to a network interface device structure
@@ -3513,424 +4252,6 @@ static irqreturn_t igc_intr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void igc_set_itr(struct igc_q_vector *q_vector)
-{
-	struct igc_adapter *adapter = q_vector->adapter;
-	u32 new_itr = q_vector->itr_val;
-	u8 current_itr = 0;
-
-	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
-	switch (adapter->link_speed) {
-	case SPEED_10:
-	case SPEED_100:
-		current_itr = 0;
-		new_itr = IGC_4K_ITR;
-		goto set_itr_now;
-	default:
-		break;
-	}
-
-	igc_update_itr(q_vector, &q_vector->tx);
-	igc_update_itr(q_vector, &q_vector->rx);
-
-	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
-
-	/* conservative mode (itr 3) eliminates the lowest_latency setting */
-	if (current_itr == lowest_latency &&
-	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
-	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
-		current_itr = low_latency;
-
-	switch (current_itr) {
-	/* counts and packets in update_itr are dependent on these numbers */
-	case lowest_latency:
-		new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
-		break;
-	case low_latency:
-		new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
-		break;
-	case bulk_latency:
-		new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
-		break;
-	default:
-		break;
-	}
-
-set_itr_now:
-	if (new_itr != q_vector->itr_val) {
-		/* this attempts to bias the interrupt rate towards Bulk
-		 * by adding intermediate steps when interrupt rate is
-		 * increasing
-		 */
-		new_itr = new_itr > q_vector->itr_val ?
-			  max((new_itr * q_vector->itr_val) /
-			  (new_itr + (q_vector->itr_val >> 2)),
-			  new_itr) : new_itr;
-		/* Don't write the value here; it resets the adapter's
-		 * internal timer, and causes us to delay far longer than
-		 * we should between interrupts.  Instead, we write the ITR
-		 * value at the beginning of the next interrupt so the timing
-		 * ends up being correct.
-		 */
-		q_vector->itr_val = new_itr;
-		q_vector->set_itr = 1;
-	}
-}
-
-static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
-{
-	struct igc_adapter *adapter = q_vector->adapter;
-	struct igc_hw *hw = &adapter->hw;
-
-	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
-	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
-		if (adapter->num_q_vectors == 1)
-			igc_set_itr(q_vector);
-		else
-			igc_update_ring_itr(q_vector);
-	}
-
-	if (!test_bit(__IGC_DOWN, &adapter->state)) {
-		if (adapter->msix_entries)
-			wr32(IGC_EIMS, q_vector->eims_value);
-		else
-			igc_irq_enable(adapter);
-	}
-}
-
-/**
- * igc_poll - NAPI Rx polling callback
- * @napi: napi polling structure
- * @budget: count of how many packets we should handle
- */
-static int igc_poll(struct napi_struct *napi, int budget)
-{
-	struct igc_q_vector *q_vector = container_of(napi,
-						     struct igc_q_vector,
-						     napi);
-	bool clean_complete = true;
-	int work_done = 0;
-
-	if (q_vector->tx.ring)
-		clean_complete = igc_clean_tx_irq(q_vector, budget);
-
-	if (q_vector->rx.ring) {
-		int cleaned = igc_clean_rx_irq(q_vector, budget);
-
-		work_done += cleaned;
-		if (cleaned >= budget)
-			clean_complete = false;
-	}
-
-	/* If all work not completed, return budget and keep polling */
-	if (!clean_complete)
-		return budget;
-
-	/* Exit the polling mode, but don't re-enable interrupts if stack might
-	 * poll us due to busy-polling
-	 */
-	if (likely(napi_complete_done(napi, work_done)))
-		igc_ring_irq_enable(q_vector);
-
-	return min(work_done, budget - 1);
-}
-
-/**
- * igc_set_interrupt_capability - set MSI or MSI-X if supported
- * @adapter: Pointer to adapter structure
- *
- * Attempt to configure interrupts using the best available
- * capabilities of the hardware and kernel.
- */
-static void igc_set_interrupt_capability(struct igc_adapter *adapter,
-					 bool msix)
-{
-	int numvecs, i;
-	int err;
-
-	if (!msix)
-		goto msi_only;
-	adapter->flags |= IGC_FLAG_HAS_MSIX;
-
-	/* Number of supported queues. */
-	adapter->num_rx_queues = adapter->rss_queues;
-
-	adapter->num_tx_queues = adapter->rss_queues;
-
-	/* start with one vector for every Rx queue */
-	numvecs = adapter->num_rx_queues;
-
-	/* if Tx handler is separate add 1 for every Tx queue */
-	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
-		numvecs += adapter->num_tx_queues;
-
-	/* store the number of vectors reserved for queues */
-	adapter->num_q_vectors = numvecs;
-
-	/* add 1 vector for link status interrupts */
-	numvecs++;
-
-	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
-					GFP_KERNEL);
-
-	if (!adapter->msix_entries)
-		return;
-
-	/* populate entry values */
-	for (i = 0; i < numvecs; i++)
-		adapter->msix_entries[i].entry = i;
-
-	err = pci_enable_msix_range(adapter->pdev,
-				    adapter->msix_entries,
-				    numvecs,
-				    numvecs);
-	if (err > 0)
-		return;
-
-	kfree(adapter->msix_entries);
-	adapter->msix_entries = NULL;
-
-	igc_reset_interrupt_capability(adapter);
-
-msi_only:
-	adapter->flags &= ~IGC_FLAG_HAS_MSIX;
-
-	adapter->rss_queues = 1;
-	adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
-	adapter->num_rx_queues = 1;
-	adapter->num_tx_queues = 1;
-	adapter->num_q_vectors = 1;
-	if (!pci_enable_msi(adapter->pdev))
-		adapter->flags |= IGC_FLAG_HAS_MSI;
-}
-
-static void igc_add_ring(struct igc_ring *ring,
-			 struct igc_ring_container *head)
-{
-	head->ring = ring;
-	head->count++;
-}
-
-/**
- * igc_alloc_q_vector - Allocate memory for a single interrupt vector
- * @adapter: board private structure to initialize
- * @v_count: q_vectors allocated on adapter, used for ring interleaving
- * @v_idx: index of vector in adapter struct
- * @txr_count: total number of Tx rings to allocate
- * @txr_idx: index of first Tx ring to allocate
- * @rxr_count: total number of Rx rings to allocate
- * @rxr_idx: index of first Rx ring to allocate
- *
- * We allocate one q_vector.  If allocation fails we return -ENOMEM.
- */
-static int igc_alloc_q_vector(struct igc_adapter *adapter,
-			      unsigned int v_count, unsigned int v_idx,
-			      unsigned int txr_count, unsigned int txr_idx,
-			      unsigned int rxr_count, unsigned int rxr_idx)
-{
-	struct igc_q_vector *q_vector;
-	struct igc_ring *ring;
-	int ring_count;
-
-	/* igc only supports 1 Tx and/or 1 Rx queue per vector */
-	if (txr_count > 1 || rxr_count > 1)
-		return -ENOMEM;
-
-	ring_count = txr_count + rxr_count;
-
-	/* allocate q_vector and rings */
-	q_vector = adapter->q_vector[v_idx];
-	if (!q_vector)
-		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
-				   GFP_KERNEL);
-	else
-		memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
-	if (!q_vector)
-		return -ENOMEM;
-
-	/* initialize NAPI */
-	netif_napi_add(adapter->netdev, &q_vector->napi,
-		       igc_poll, 64);
-
-	/* tie q_vector and adapter together */
-	adapter->q_vector[v_idx] = q_vector;
-	q_vector->adapter = adapter;
-
-	/* initialize work limits */
-	q_vector->tx.work_limit = adapter->tx_work_limit;
-
-	/* initialize ITR configuration */
-	q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
-	q_vector->itr_val = IGC_START_ITR;
-
-	/* initialize pointer to rings */
-	ring = q_vector->ring;
-
-	/* initialize ITR */
-	if (rxr_count) {
-		/* rx or rx/tx vector */
-		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
-			q_vector->itr_val = adapter->rx_itr_setting;
-	} else {
-		/* tx only vector */
-		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
-			q_vector->itr_val = adapter->tx_itr_setting;
-	}
-
-	if (txr_count) {
-		/* assign generic ring traits */
-		ring->dev = &adapter->pdev->dev;
-		ring->netdev = adapter->netdev;
-
-		/* configure backlink on ring */
-		ring->q_vector = q_vector;
-
-		/* update q_vector Tx values */
-		igc_add_ring(ring, &q_vector->tx);
-
-		/* apply Tx specific ring traits */
-		ring->count = adapter->tx_ring_count;
-		ring->queue_index = txr_idx;
-
-		/* assign ring to adapter */
-		adapter->tx_ring[txr_idx] = ring;
-
-		/* push pointer to next ring */
-		ring++;
-	}
-
-	if (rxr_count) {
-		/* assign generic ring traits */
-		ring->dev = &adapter->pdev->dev;
-		ring->netdev = adapter->netdev;
-
-		/* configure backlink on ring */
-		ring->q_vector = q_vector;
-
-		/* update q_vector Rx values */
-		igc_add_ring(ring, &q_vector->rx);
-
-		/* apply Rx specific ring traits */
-		ring->count = adapter->rx_ring_count;
-		ring->queue_index = rxr_idx;
-
-		/* assign ring to adapter */
-		adapter->rx_ring[rxr_idx] = ring;
-	}
-
-	return 0;
-}
-
-/**
- * igc_alloc_q_vectors - Allocate memory for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * We allocate one q_vector per queue interrupt.  If allocation fails we
- * return -ENOMEM.
- */
-static int igc_alloc_q_vectors(struct igc_adapter *adapter)
-{
-	int rxr_remaining = adapter->num_rx_queues;
-	int txr_remaining = adapter->num_tx_queues;
-	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
-	int q_vectors = adapter->num_q_vectors;
-	int err;
-
-	if (q_vectors >= (rxr_remaining + txr_remaining)) {
-		for (; rxr_remaining; v_idx++) {
-			err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
-						 0, 0, 1, rxr_idx);
-
-			if (err)
-				goto err_out;
-
-			/* update counts and index */
-			rxr_remaining--;
-			rxr_idx++;
-		}
-	}
-
-	for (; v_idx < q_vectors; v_idx++) {
-		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
-		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
-
-		err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
-					 tqpv, txr_idx, rqpv, rxr_idx);
-
-		if (err)
-			goto err_out;
-
-		/* update counts and index */
-		rxr_remaining -= rqpv;
-		txr_remaining -= tqpv;
-		rxr_idx++;
-		txr_idx++;
-	}
-
-	return 0;
-
-err_out:
-	adapter->num_tx_queues = 0;
-	adapter->num_rx_queues = 0;
-	adapter->num_q_vectors = 0;
-
-	while (v_idx--)
-		igc_free_q_vector(adapter, v_idx);
-
-	return -ENOMEM;
-}
-
-/**
- * igc_cache_ring_register - Descriptor ring to register mapping
- * @adapter: board private structure to initialize
- *
- * Once we know the feature-set enabled for the device, we'll cache
- * the register offset the descriptor ring is assigned to.
- */
-static void igc_cache_ring_register(struct igc_adapter *adapter)
-{
-	int i = 0, j = 0;
-
-	switch (adapter->hw.mac.type) {
-	case igc_i225:
-	/* Fall through */
-	default:
-		for (; i < adapter->num_rx_queues; i++)
-			adapter->rx_ring[i]->reg_idx = i;
-		for (; j < adapter->num_tx_queues; j++)
-			adapter->tx_ring[j]->reg_idx = j;
-		break;
-	}
-}
-
-/**
- * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
- * @adapter: Pointer to adapter structure
- *
- * This function initializes the interrupts and allocates all of the queues.
- */
-static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
-{
-	struct pci_dev *pdev = adapter->pdev;
-	int err = 0;
-
-	igc_set_interrupt_capability(adapter, msix);
-
-	err = igc_alloc_q_vectors(adapter);
-	if (err) {
-		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
-		goto err_alloc_q_vectors;
-	}
-
-	igc_cache_ring_register(adapter);
-
-	return 0;
-
-err_alloc_q_vectors:
-	igc_reset_interrupt_capability(adapter);
-	return err;
-}
-
 static void igc_free_irq(struct igc_adapter *adapter)
 {
 	if (adapter->msix_entries) {
@@ -3947,62 +4268,6 @@ static void igc_free_irq(struct igc_adapter *adapter)
 }
 
 /**
- * igc_irq_disable - Mask off interrupt generation on the NIC
- * @adapter: board private structure
- */
-static void igc_irq_disable(struct igc_adapter *adapter)
-{
-	struct igc_hw *hw = &adapter->hw;
-
-	if (adapter->msix_entries) {
-		u32 regval = rd32(IGC_EIAM);
-
-		wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
-		wr32(IGC_EIMC, adapter->eims_enable_mask);
-		regval = rd32(IGC_EIAC);
-		wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
-	}
-
-	wr32(IGC_IAM, 0);
-	wr32(IGC_IMC, ~0);
-	wrfl();
-
-	if (adapter->msix_entries) {
-		int vector = 0, i;
-
-		synchronize_irq(adapter->msix_entries[vector++].vector);
-
-		for (i = 0; i < adapter->num_q_vectors; i++)
-			synchronize_irq(adapter->msix_entries[vector++].vector);
-	} else {
-		synchronize_irq(adapter->pdev->irq);
-	}
-}
-
-/**
- * igc_irq_enable - Enable default interrupt generation settings
- * @adapter: board private structure
- */
-static void igc_irq_enable(struct igc_adapter *adapter)
-{
-	struct igc_hw *hw = &adapter->hw;
-
-	if (adapter->msix_entries) {
-		u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
-		u32 regval = rd32(IGC_EIAC);
-
-		wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
-		regval = rd32(IGC_EIAM);
-		wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
-		wr32(IGC_EIMS, adapter->eims_enable_mask);
-		wr32(IGC_IMS, ims);
-	} else {
-		wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
-		wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
-	}
-}
-
-/**
  * igc_request_irq - initialize interrupts
  * @adapter: Pointer to adapter structure
  *
@@ -4056,25 +4321,10 @@ request_done:
 	return err;
 }
 
-static void igc_write_itr(struct igc_q_vector *q_vector)
-{
-	u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
-
-	if (!q_vector->set_itr)
-		return;
-
-	if (!itr_val)
-		itr_val = IGC_ITR_VAL_MASK;
-
-	itr_val |= IGC_EITR_CNT_IGNR;
-
-	writel(itr_val, q_vector->itr_register);
-	q_vector->set_itr = 0;
-}
-
 /**
- * igc_open - Called when a network interface is made active
+ * __igc_open - Called when a network interface is made active
  * @netdev: network interface device structure
+ * @resuming: boolean indicating if the device is resuming
  *
  * Returns 0 on success, negative value on failure
  *
@@ -4164,8 +4414,9 @@ static int igc_open(struct net_device *netdev)
 }
 
 /**
- * igc_close - Disables a network interface
+ * __igc_close - Disables a network interface
  * @netdev: network interface device structure
+ * @suspending: boolean indicating the device is suspending
  *
  * Returns 0, this is not allowed to fail
  *
@@ -4199,6 +4450,24 @@ static int igc_close(struct net_device *netdev)
 	return 0;
 }
 
+/**
+ * igc_ioctl - Access the hwtstamp interface
+ * @netdev: network interface device structure
+ * @ifreq: interface request data
+ * @cmd: ioctl command
+ **/
+static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		return igc_ptp_get_ts_config(netdev, ifr);
+	case SIOCSHWTSTAMP:
+		return igc_ptp_set_ts_config(netdev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops igc_netdev_ops = {
 	.ndo_open		= igc_open,
 	.ndo_stop		= igc_close,
@@ -4210,6 +4479,7 @@ static const struct net_device_ops igc_netdev_ops = {
 	.ndo_fix_features	= igc_fix_features,
 	.ndo_set_features	= igc_set_features,
 	.ndo_features_check	= igc_features_check,
+	.ndo_do_ioctl		= igc_ioctl,
 };
 
 /* PCIe configuration access */
@@ -4345,32 +4615,26 @@ static int igc_probe(struct pci_dev *pdev,
 	struct net_device *netdev;
 	struct igc_hw *hw;
 	const struct igc_info *ei = igc_info_tbl[ent->driver_data];
-	int err;
+	int err, pci_using_dac;
 
 	err = pci_enable_device_mem(pdev);
 	if (err)
 		return err;
 
-	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+	pci_using_dac = 0;
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (!err) {
-		err = dma_set_coherent_mask(&pdev->dev,
-					    DMA_BIT_MASK(64));
+		pci_using_dac = 1;
 	} else {
-		err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 		if (err) {
-			err = dma_set_coherent_mask(&pdev->dev,
-						    DMA_BIT_MASK(32));
-			if (err) {
-				dev_err(&pdev->dev, "igc: Wrong DMA config\n");
-				goto err_dma;
-			}
+			dev_err(&pdev->dev,
+				"No usable DMA configuration, aborting\n");
+			goto err_dma;
 		}
 	}
 
-	err = pci_request_selected_regions(pdev,
-					   pci_select_bars(pdev,
-							   IORESOURCE_MEM),
-					   igc_driver_name);
+	err = pci_request_mem_regions(pdev, igc_driver_name);
 	if (err)
 		goto err_pci_reg;
 
@@ -4433,6 +4697,9 @@ static int igc_probe(struct pci_dev *pdev,
 		goto err_sw_init;
 
 	/* Add supported features to the features list*/
+	netdev->features |= NETIF_F_SG;
+	netdev->features |= NETIF_F_TSO;
+	netdev->features |= NETIF_F_TSO6;
 	netdev->features |= NETIF_F_RXCSUM;
 	netdev->features |= NETIF_F_HW_CSUM;
 	netdev->features |= NETIF_F_SCTP_CRC;
@@ -4446,6 +4713,9 @@ static int igc_probe(struct pci_dev *pdev,
 	netdev->hw_features |= NETIF_F_NTUPLE;
 	netdev->hw_features |= netdev->features;
 
+	if (pci_using_dac)
+		netdev->features |= NETIF_F_HIGHDMA;
+
 	/* MTU range: 68 - 9216 */
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
@@ -4512,6 +4782,9 @@ static int igc_probe(struct pci_dev *pdev,
 	 /* carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
 
+	/* do hw tstamp init after resetting */
+	igc_ptp_init(adapter);
+
 	/* Check if Media Autosense is enabled */
 	adapter->ei = *ei;
 
@@ -4532,8 +4805,7 @@ err_sw_init:
 err_ioremap:
 	free_netdev(netdev);
 err_alloc_etherdev:
-	pci_release_selected_regions(pdev,
-				     pci_select_bars(pdev, IORESOURCE_MEM));
+	pci_release_mem_regions(pdev);
 err_pci_reg:
 err_dma:
 	pci_disable_device(pdev);
@@ -4554,6 +4826,8 @@ static void igc_remove(struct pci_dev *pdev)
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
+	igc_ptp_stop(adapter);
+
 	set_bit(__IGC_DOWN, &adapter->state);
 
 	del_timer_sync(&adapter->watchdog_timer);
@@ -4580,105 +4854,216 @@ static void igc_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
-static struct pci_driver igc_driver = {
-	.name     = igc_driver_name,
-	.id_table = igc_pci_tbl,
-	.probe    = igc_probe,
-	.remove   = igc_remove,
-};
-
-void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
-			      const u32 max_rss_queues)
+static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
+			  bool runtime)
 {
-	/* Determine if we need to pair queues. */
-	/* If rss_queues > half of max_rss_queues, pair the queues in
-	 * order to conserve interrupts due to limited supply.
-	 */
-	if (adapter->rss_queues > (max_rss_queues / 2))
-		adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl, rctl, status;
+	bool wake;
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		__igc_close(netdev, true);
+
+	igc_clear_interrupt_scheme(adapter);
+	rtnl_unlock();
+
+	status = rd32(IGC_STATUS);
+	if (status & IGC_STATUS_LU)
+		wufc &= ~IGC_WUFC_LNKC;
+
+	if (wufc) {
+		igc_setup_rctl(adapter);
+		igc_set_rx_mode(netdev);
+
+		/* turn on all-multi mode if wake on multicast is enabled */
+		if (wufc & IGC_WUFC_MC) {
+			rctl = rd32(IGC_RCTL);
+			rctl |= IGC_RCTL_MPE;
+			wr32(IGC_RCTL, rctl);
+		}
+
+		ctrl = rd32(IGC_CTRL);
+		ctrl |= IGC_CTRL_ADVD3WUC;
+		wr32(IGC_CTRL, ctrl);
+
+		/* Allow time for pending master requests to run */
+		igc_disable_pcie_master(hw);
+
+		wr32(IGC_WUC, IGC_WUC_PME_EN);
+		wr32(IGC_WUFC, wufc);
+	} else {
+		wr32(IGC_WUC, 0);
+		wr32(IGC_WUFC, 0);
+	}
+
+	wake = wufc || adapter->en_mng_pt;
+	if (!wake)
+		igc_power_down_link(adapter);
 	else
-		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
-}
+		igc_power_up_link(adapter);
 
-unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
-{
-	unsigned int max_rss_queues;
+	if (enable_wake)
+		*enable_wake = wake;
 
-	/* Determine the maximum number of RSS queues supported. */
-	max_rss_queues = IGC_MAX_RX_QUEUES;
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	igc_release_hw_control(adapter);
 
-	return max_rss_queues;
+	pci_disable_device(pdev);
+
+	return 0;
 }
 
-static void igc_init_queue_configuration(struct igc_adapter *adapter)
+#ifdef CONFIG_PM
+static int __maybe_unused igc_runtime_suspend(struct device *dev)
 {
-	u32 max_rss_queues;
-
-	max_rss_queues = igc_get_max_rss_queues(adapter);
-	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
-
-	igc_set_flag_queue_pairs(adapter, max_rss_queues);
+	return __igc_shutdown(to_pci_dev(dev), NULL, 1);
 }
 
-/**
- * igc_sw_init - Initialize general software structures (struct igc_adapter)
- * @adapter: board private structure to initialize
- *
- * igc_sw_init initializes the Adapter private data structure.
- * Fields are initialized based on PCI device information and
- * OS network device settings (MTU size).
- */
-static int igc_sw_init(struct igc_adapter *adapter)
+static void igc_deliver_wake_packet(struct net_device *netdev)
 {
-	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
+	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
+	struct sk_buff *skb;
+	u32 wupl;
 
-	int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
+	wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
 
-	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+	/* WUPM stores only the first 128 bytes of the wake packet.
+	 * Read the packet only if we have the whole thing.
+	 */
+	if (wupl == 0 || wupl > IGC_WUPM_BYTES)
+		return;
 
-	/* set default ring sizes */
-	adapter->tx_ring_count = IGC_DEFAULT_TXD;
-	adapter->rx_ring_count = IGC_DEFAULT_RXD;
+	skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
+	if (!skb)
+		return;
 
-	/* set default ITR values */
-	adapter->rx_itr_setting = IGC_DEFAULT_ITR;
-	adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+	skb_put(skb, wupl);
 
-	/* set default work limits */
-	adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+	/* Ensure reads are 32-bit aligned */
+	wupl = roundup(wupl, 4);
 
-	/* adjust max frame to be at least the size of a standard frame */
-	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
-				VLAN_HLEN;
-	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+	memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
 
-	spin_lock_init(&adapter->nfc_lock);
-	spin_lock_init(&adapter->stats64_lock);
-	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
-	adapter->flags |= IGC_FLAG_HAS_MSIX;
+	skb->protocol = eth_type_trans(skb, netdev);
+	netif_rx(skb);
+}
 
-	adapter->mac_table = kzalloc(size, GFP_ATOMIC);
-	if (!adapter->mac_table)
-		return -ENOMEM;
+static int __maybe_unused igc_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 err, val;
 
-	igc_init_queue_configuration(adapter);
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	if (!pci_device_is_present(pdev))
+		return -ENODEV;
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"igc: Cannot enable PCI device from suspend\n");
+		return err;
+	}
+	pci_set_master(pdev);
+
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pci_enable_wake(pdev, PCI_D3cold, 0);
 
-	/* This call may decrease the number of queues */
 	if (igc_init_interrupt_scheme(adapter, true)) {
 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
 		return -ENOMEM;
 	}
 
-	/* Explicitly disable IRQ since the NIC can be in any state. */
-	igc_irq_disable(adapter);
+	igc_reset(adapter);
 
-	set_bit(__IGC_DOWN, &adapter->state);
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igc_get_hw_control(adapter);
 
-	return 0;
+	val = rd32(IGC_WUS);
+	if (val & WAKE_PKT_WUS)
+		igc_deliver_wake_packet(netdev);
+
+	wr32(IGC_WUS, ~0);
+
+	rtnl_lock();
+	if (!err && netif_running(netdev))
+		err = __igc_open(netdev, true);
+
+	if (!err)
+		netif_device_attach(netdev);
+	rtnl_unlock();
+
+	return err;
+}
+
+static int __maybe_unused igc_runtime_resume(struct device *dev)
+{
+	return igc_resume(dev);
+}
+
+static int __maybe_unused igc_suspend(struct device *dev)
+{
+	return __igc_shutdown(to_pci_dev(dev), NULL, 0);
 }
 
+static int __maybe_unused igc_runtime_idle(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (!igc_has_link(adapter))
+		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
+
+	return -EBUSY;
+}
+#endif /* CONFIG_PM */
+
+static void igc_shutdown(struct pci_dev *pdev)
+{
+	bool wake;
+
+	__igc_shutdown(pdev, &wake, 0);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, wake);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+#ifdef CONFIG_PM
+static const struct dev_pm_ops igc_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
+	SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
+			   igc_runtime_idle)
+};
+#endif
+
+static struct pci_driver igc_driver = {
+	.name     = igc_driver_name,
+	.id_table = igc_pci_tbl,
+	.probe    = igc_probe,
+	.remove   = igc_remove,
+#ifdef CONFIG_PM
+	.driver.pm = &igc_pm_ops,
+#endif
+	.shutdown = igc_shutdown,
+};
+
 /**
  * igc_reinit_queues - return error
  * @adapter: pointer to adapter structure
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index f4b05af0dd2f..8e1799508edc 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -173,6 +173,7 @@ s32 igc_check_downshift(struct igc_hw *hw)
 s32 igc_phy_hw_reset(struct igc_hw *hw)
 {
 	struct igc_phy_info *phy = &hw->phy;
+	u32 phpm = 0, timeout = 10000;
 	s32  ret_val;
 	u32 ctrl;
 
@@ -186,6 +187,8 @@ s32 igc_phy_hw_reset(struct igc_hw *hw)
 	if (ret_val)
 		goto out;
 
+	phpm = rd32(IGC_I225_PHPM);
+
 	ctrl = rd32(IGC_CTRL);
 	wr32(IGC_CTRL, ctrl | IGC_CTRL_PHY_RST);
 	wrfl();
@@ -195,7 +198,18 @@ s32 igc_phy_hw_reset(struct igc_hw *hw)
 	wr32(IGC_CTRL, ctrl);
 	wrfl();
 
-	usleep_range(1500, 2000);
+	/* SW should guarantee 100us for the completion of the PHY reset */
+	usleep_range(100, 150);
+	do {
+		phpm = rd32(IGC_I225_PHPM);
+		timeout--;
+		udelay(1);
+	} while (!(phpm & IGC_PHY_RST_COMP) && timeout);
+
+	if (!timeout)
+		hw_dbg("Timeout is expired after a phy reset\n");
+
+	usleep_range(100, 150);
 
 	phy->ops.release(hw);
 
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
new file mode 100644
index 000000000000..693506587198
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2019 Intel Corporation */
+
+#include "igc.h"
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/ptp_classify.h>
+#include <linux/clocksource.h>
+
+#define INCVALUE_MASK		0x7fffffff
+#define ISGN			0x80000000
+
+#define IGC_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 9)
+#define IGC_PTP_TX_TIMEOUT		(HZ * 15)
+
+/* SYSTIM read access for I225 */
+static void igc_ptp_read_i225(struct igc_adapter *adapter,
+			      struct timespec64 *ts)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 sec, nsec;
+
+	/* The timestamp latches on lowest register read. For I210/I211, the
+	 * lowest register is SYSTIMR. Since we only need to provide nanosecond
+	 * resolution, we can ignore it.
+	 */
+	rd32(IGC_SYSTIMR);
+	nsec = rd32(IGC_SYSTIML);
+	sec = rd32(IGC_SYSTIMH);
+
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static void igc_ptp_write_i225(struct igc_adapter *adapter,
+			       const struct timespec64 *ts)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	/* Writing the SYSTIMR register is not necessary as it only
+	 * provides sub-nanosecond resolution.
+	 */
+	wr32(IGC_SYSTIML, ts->tv_nsec);
+	wr32(IGC_SYSTIMH, ts->tv_sec);
+}
+
+static int igc_ptp_adjfine_i225(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	struct igc_hw *hw = &igc->hw;
+	int neg_adj = 0;
+	u64 rate;
+	u32 inca;
+
+	if (scaled_ppm < 0) {
+		neg_adj = 1;
+		scaled_ppm = -scaled_ppm;
+	}
+	rate = scaled_ppm;
+	rate <<= 14;
+	rate = div_u64(rate, 78125);
+
+	inca = rate & INCVALUE_MASK;
+	if (neg_adj)
+		inca |= ISGN;
+
+	wr32(IGC_TIMINCA, inca);
+
+	return 0;
+}
+
+static int igc_ptp_adjtime_i225(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	struct timespec64 now, then = ns_to_timespec64(delta);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+	igc_ptp_read_i225(igc, &now);
+	now = timespec64_add(now, then);
+	igc_ptp_write_i225(igc, (const struct timespec64 *)&now);
+
+	spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igc_ptp_gettimex64_i225(struct ptp_clock_info *ptp,
+				   struct timespec64 *ts,
+				   struct ptp_system_timestamp *sts)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	struct igc_hw *hw = &igc->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+	ptp_read_system_prets(sts);
+	rd32(IGC_SYSTIMR);
+	ptp_read_system_postts(sts);
+	ts->tv_nsec = rd32(IGC_SYSTIML);
+	ts->tv_sec = rd32(IGC_SYSTIMH);
+
+	spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igc_ptp_settime_i225(struct ptp_clock_info *ptp,
+				const struct timespec64 *ts)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+	igc_ptp_write_i225(igc, ts);
+
+	spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+				       struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ * igc_ptp_systim_to_hwtstamp - convert system time value to HW timestamp
+ * @adapter: board private structure
+ * @hwtstamps: timestamp structure to update
+ * @systim: unsigned 64bit system time value
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
+ **/
+static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
+				       struct skb_shared_hwtstamps *hwtstamps,
+				       u64 systim)
+{
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+		memset(hwtstamps, 0, sizeof(*hwtstamps));
+		/* Upper 32 bits contain s, lower 32 bits contain ns. */
+		hwtstamps->hwtstamp = ktime_set(systim >> 32,
+						systim & 0xFFFFFFFF);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * igc_ptp_rx_pktstamp - retrieve Rx per packet timestamp
+ * @q_vector: Pointer to interrupt specific structure
+ * @va: Pointer to address containing Rx buffer
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve the first timestamp from the
+ * first buffer of an incoming frame. The value is stored in little
+ * endian format starting on byte 0. There's a second timestamp
+ * starting on byte 8.
+ **/
+void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va,
+			 struct sk_buff *skb)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	__le64 *regval = (__le64 *)va;
+	int adjust = 0;
+
+	/* The timestamp is recorded in little endian format.
+	 * DWORD: | 0          | 1           | 2          | 3
+	 * Field: | Timer0 Low | Timer0 High | Timer1 Low | Timer1 High
+	 */
+	igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
+				   le64_to_cpu(regval[0]));
+
+	/* adjust timestamp for the RX latency based on link speed */
+	if (adapter->hw.mac.type == igc_i225) {
+		switch (adapter->link_speed) {
+		case SPEED_10:
+			adjust = IGC_I225_RX_LATENCY_10;
+			break;
+		case SPEED_100:
+			adjust = IGC_I225_RX_LATENCY_100;
+			break;
+		case SPEED_1000:
+			adjust = IGC_I225_RX_LATENCY_1000;
+			break;
+		case SPEED_2500:
+			adjust = IGC_I225_RX_LATENCY_2500;
+			break;
+		}
+	}
+	skb_hwtstamps(skb)->hwtstamp =
+		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+}
+
+/**
+ * igc_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
+ * @q_vector: Pointer to interrupt specific structure
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve a timestamp from the internal registers
+ * of the adapter and store it in the skb.
+ */
+void igc_ptp_rx_rgtstamp(struct igc_q_vector *q_vector,
+			 struct sk_buff *skb)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+	u64 regval;
+
+	/* If this bit is set, then the RX registers contain the time
+	 * stamp. No other packet will be time stamped until we read
+	 * these registers, so read the registers to make them
+	 * available again. Because only one packet can be time
+	 * stamped at a time, we know that the register values must
+	 * belong to this one here and therefore we don't need to
+	 * compare any of the additional attributes stored for it.
+	 *
+	 * If nothing went wrong, then it should have a shared
+	 * tx_flags that we can turn into a skb_shared_hwtstamps.
+	 */
+	if (!(rd32(IGC_TSYNCRXCTL) & IGC_TSYNCRXCTL_VALID))
+		return;
+
+	regval = rd32(IGC_RXSTMPL);
+	regval |= (u64)rd32(IGC_RXSTMPH) << 32;
+
+	igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+
+	/* Update the last_rx_timestamp timer in order to enable watchdog check
+	 * for error case of latched timestamp on a dropped packet.
+	 */
+	adapter->last_rx_timestamp = jiffies;
+}
+
+/**
+ * igc_ptp_enable_tstamp_rxqueue - Enable RX timestamp for a queue
+ * @rx_ring: Pointer to RX queue
+ * @timer: Index for timer
+ *
+ * This function enables RX timestamping for a queue, and selects
+ * which 1588 timer will provide the timestamp.
+ */
+static void igc_ptp_enable_tstamp_rxqueue(struct igc_adapter *adapter,
+					  struct igc_ring *rx_ring, u8 timer)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int reg_idx = rx_ring->reg_idx;
+	u32 srrctl = rd32(IGC_SRRCTL(reg_idx));
+
+	srrctl |= IGC_SRRCTL_TIMESTAMP;
+	srrctl |= IGC_SRRCTL_TIMER1SEL(timer);
+	srrctl |= IGC_SRRCTL_TIMER0SEL(timer);
+
+	wr32(IGC_SRRCTL(reg_idx), srrctl);
+}
+
+static void igc_ptp_enable_tstamp_all_rxqueues(struct igc_adapter *adapter,
+					       u8 timer)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igc_ring *ring = adapter->rx_ring[i];
+
+		igc_ptp_enable_tstamp_rxqueue(adapter, ring, timer);
+	}
+}
+
+/**
+ * igc_ptp_set_timestamp_mode - setup hardware for timestamping
+ * @adapter: networking device structure
+ * @config: hwtstamp configuration
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ */
+static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
+				      struct hwtstamp_config *config)
+{
+	u32 tsync_tx_ctl = IGC_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = IGC_TSYNCRXCTL_ENABLED;
+	struct igc_hw *hw = &adapter->hw;
+	u32 tsync_rx_cfg = 0;
+	bool is_l4 = false;
+	bool is_l2 = false;
+	u32 regval;
+
+	/* reserved for future extensions */
+	if (config->flags)
+		return -EINVAL;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = IGC_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = IGC_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_EVENT_V2;
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+		tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL;
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+		/* fall through */
+	default:
+		config->rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	/* Per-packet timestamping only works if all packets are
+	 * timestamped, so enable timestamping in all packets as long
+	 * as one Rx filter was configured.
+	 */
+	if (tsync_rx_ctl) {
+		tsync_rx_ctl = IGC_TSYNCRXCTL_ENABLED;
+		tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL;
+		tsync_rx_ctl |= IGC_TSYNCRXCTL_RXSYNSIG;
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		is_l2 = true;
+		is_l4 = true;
+
+		if (hw->mac.type == igc_i225) {
+			regval = rd32(IGC_RXPBS);
+			regval |= IGC_RXPBS_CFG_TS_EN;
+			wr32(IGC_RXPBS, regval);
+
+			/* FIXME: For now, only support retrieving RX
+			 * timestamps from timer 0
+			 */
+			igc_ptp_enable_tstamp_all_rxqueues(adapter, 0);
+		}
+	}
+
+	if (tsync_tx_ctl) {
+		tsync_tx_ctl = IGC_TSYNCTXCTL_ENABLED;
+		tsync_tx_ctl |= IGC_TSYNCTXCTL_TXSYNSIG;
+	}
+
+	/* enable/disable TX */
+	regval = rd32(IGC_TSYNCTXCTL);
+	regval &= ~IGC_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	wr32(IGC_TSYNCTXCTL, regval);
+
+	/* enable/disable RX */
+	regval = rd32(IGC_TSYNCRXCTL);
+	regval &= ~(IGC_TSYNCRXCTL_ENABLED | IGC_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	wr32(IGC_TSYNCRXCTL, regval);
+
+	/* define which PTP packets are time stamped */
+	wr32(IGC_TSYNCRXCFG, tsync_rx_cfg);
+
+	/* define ethertype filter for timestamped packets */
+	if (is_l2)
+		wr32(IGC_ETQF(3),
+		     (IGC_ETQF_FILTER_ENABLE | /* enable filter */
+		     IGC_ETQF_1588 | /* enable timestamping */
+		     ETH_P_1588)); /* 1588 eth protocol type */
+	else
+		wr32(IGC_ETQF(3), 0);
+
+	/* L4 Queue Filter[3]: filter by destination port and protocol */
+	if (is_l4) {
+		u32 ftqf = (IPPROTO_UDP /* UDP */
+			    | IGC_FTQF_VF_BP /* VF not compared */
+			    | IGC_FTQF_1588_TIME_STAMP /* Enable Timestamp */
+			    | IGC_FTQF_MASK); /* mask all inputs */
+		ftqf &= ~IGC_FTQF_MASK_PROTO_BP; /* enable protocol check */
+
+		wr32(IGC_IMIR(3), htons(PTP_EV_PORT));
+		wr32(IGC_IMIREXT(3),
+		     (IGC_IMIREXT_SIZE_BP | IGC_IMIREXT_CTRL_BP));
+		wr32(IGC_FTQF(3), ftqf);
+	} else {
+		wr32(IGC_FTQF(3), IGC_FTQF_MASK);
+	}
+	wrfl();
+
+	/* clear TX/RX time stamp registers, just to be sure */
+	regval = rd32(IGC_TXSTMPL);
+	regval = rd32(IGC_TXSTMPH);
+	regval = rd32(IGC_RXSTMPL);
+	regval = rd32(IGC_RXSTMPH);
+
+	return 0;
+}
+
+void igc_ptp_tx_hang(struct igc_adapter *adapter)
+{
+	bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
+					      IGC_PTP_TX_TIMEOUT);
+	struct igc_hw *hw = &adapter->hw;
+
+	if (!adapter->ptp_tx_skb)
+		return;
+
+	if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
+		return;
+
+	/* If we haven't received a timestamp within the timeout, it is
+	 * reasonable to assume that it will never occur, so we can unlock the
+	 * timestamp bit when this occurs.
+	 */
+	if (timeout) {
+		cancel_work_sync(&adapter->ptp_tx_work);
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+		adapter->tx_hwtstamp_timeouts++;
+		/* Clear the Tx valid bit in TSYNCTXCTL register to enable
+		 * interrupt
+		 */
+		rd32(IGC_TXSTMPH);
+		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+	}
+}
+
+/**
+ * igc_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: Board private structure
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
+{
+	struct sk_buff *skb = adapter->ptp_tx_skb;
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct igc_hw *hw = &adapter->hw;
+	u64 regval;
+
+	regval = rd32(IGC_TXSTMPL);
+	regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+	igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+
+	/* Clear the lock early before calling skb_tstamp_tx so that
+	 * applications are not woken up before the lock bit is clear. We use
+	 * a copy of the skb pointer to ensure other threads can't change it
+	 * while we're notifying the stack.
+	 */
+	adapter->ptp_tx_skb = NULL;
+	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+
+	/* Notify the stack and free the skb after we've unlocked */
+	skb_tstamp_tx(skb, &shhwtstamps);
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * igc_ptp_tx_work
+ * @work: pointer to work struct
+ *
+ * This work function polls the TSYNCTXCTL valid bit to determine when a
+ * timestamp has been taken for the current stored skb.
+ */
+void igc_ptp_tx_work(struct work_struct *work)
+{
+	struct igc_adapter *adapter = container_of(work, struct igc_adapter,
+						   ptp_tx_work);
+	struct igc_hw *hw = &adapter->hw;
+	u32 tsynctxctl;
+
+	if (!adapter->ptp_tx_skb)
+		return;
+
+	if (time_is_before_jiffies(adapter->ptp_tx_start +
+				   IGC_PTP_TX_TIMEOUT)) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+		adapter->tx_hwtstamp_timeouts++;
+		/* Clear the tx valid bit in TSYNCTXCTL register to enable
+		 * interrupt
+		 */
+		rd32(IGC_TXSTMPH);
+		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+		return;
+	}
+
+	tsynctxctl = rd32(IGC_TSYNCTXCTL);
+	if (tsynctxctl & IGC_TSYNCTXCTL_VALID)
+		igc_ptp_tx_hwtstamp(adapter);
+	else
+		/* reschedule to check later */
+		schedule_work(&adapter->ptp_tx_work);
+}
+
+/**
+ * igc_ptp_set_ts_config - set hardware time stamping config
+ * @netdev: network interface device structure
+ * @ifreq: interface request data
+ *
+ **/
+int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = igc_ptp_set_timestamp_mode(adapter, &config);
+	if (err)
+		return err;
+
+	/* save these settings for future reference */
+	memcpy(&adapter->tstamp_config, &config,
+	       sizeof(adapter->tstamp_config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * igc_ptp_get_ts_config - get hardware time stamping config
+ * @netdev: network interface device structure
+ * @ifreq: interface request data
+ *
+ * Get the hwtstamp_config settings to return to the user. Rather than attempt
+ * to deconstruct the settings from the registers, just return a shadow copy
+ * of the last known settings.
+ **/
+int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config *config = &adapter->tstamp_config;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * igc_ptp_init - Initialize PTP functionality
+ * @adapter: Board private structure
+ *
+ * This function is called at device probe to initialize the PTP
+ * functionality.
+ */
+void igc_ptp_init(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+
+	switch (hw->mac.type) {
+	case igc_i225:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225;
+		adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225;
+		adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225;
+		adapter->ptp_caps.settime64 = igc_ptp_settime_i225;
+		adapter->ptp_caps.enable = igc_ptp_feature_enable_i225;
+		break;
+	default:
+		adapter->ptp_clock = NULL;
+		return;
+	}
+
+	spin_lock_init(&adapter->tmreg_lock);
+	INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work);
+
+	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+	igc_ptp_reset(adapter);
+
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
+	if (IS_ERR(adapter->ptp_clock)) {
+		adapter->ptp_clock = NULL;
+		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
+	} else if (adapter->ptp_clock) {
+		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
+			 adapter->netdev->name);
+		adapter->ptp_flags |= IGC_PTP_ENABLED;
+	}
+}
+
+/**
+ * igc_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
+ *
+ * This function stops the overflow check work and PTP Tx timestamp work, and
+ * will prepare the device for OS suspend.
+ */
+void igc_ptp_suspend(struct igc_adapter *adapter)
+{
+	if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
+		return;
+
+	cancel_work_sync(&adapter->ptp_tx_work);
+	if (adapter->ptp_tx_skb) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+	}
+}
+
+/**
+ * igc_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igc_ptp_stop(struct igc_adapter *adapter)
+{
+	igc_ptp_suspend(adapter);
+
+	if (adapter->ptp_clock) {
+		ptp_clock_unregister(adapter->ptp_clock);
+		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
+			 adapter->netdev->name);
+		adapter->ptp_flags &= ~IGC_PTP_ENABLED;
+	}
+}
+
+/**
+ * igc_ptp_reset - Re-enable the adapter for PTP following a reset.
+ * @adapter: Board private structure.
+ *
+ * This function handles the reset work required to re-enable the PTP device.
+ **/
+void igc_ptp_reset(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	unsigned long flags;
+
+	/* reset the tstamp_config */
+	igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+		wr32(IGC_TSAUXC, 0x0);
+		wr32(IGC_TSSDP, 0x0);
+		wr32(IGC_TSIM, IGC_TSICR_INTERRUPTS);
+		wr32(IGC_IMS, IGC_IMS_TS);
+		break;
+	default:
+		/* No work to do. */
+		goto out;
+	}
+
+	/* Re-initialize the timer. */
+	if (hw->mac.type == igc_i225) {
+		struct timespec64 ts64 = ktime_to_timespec64(ktime_get_real());
+
+		igc_ptp_write_i225(adapter, &ts64);
+	} else {
+		timecounter_init(&adapter->tc, &adapter->cc,
+				 ktime_to_ns(ktime_get_real()));
+	}
+out:
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	wrfl();
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 50d7c04dccf5..c9029b549b90 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -12,6 +12,7 @@
 #define IGC_MDIC		0x00020  /* MDI Control - RW */
 #define IGC_MDICNFG		0x00E04  /* MDC/MDIO Configuration - RW */
 #define IGC_CONNSW		0x00034  /* Copper/Fiber switch control - RW */
+#define IGC_I225_PHPM		0x00E14  /* I225 PHY Power Management */
 
 /* Internal Packet Buffer Size Registers */
 #define IGC_RXPBS		0x02404  /* Rx Packet Buffer Size - RW */
@@ -209,12 +210,48 @@
 #define IGC_LENERRS	0x04138  /* Length Errors Count */
 #define IGC_HRMPC	0x0A018  /* Header Redirection Missed Packet Count */
 
+/* Time sync registers */
+#define IGC_TSICR	0x0B66C  /* Time Sync Interrupt Cause */
+#define IGC_TSIM	0x0B674  /* Time Sync Interrupt Mask Register */
+#define IGC_TSAUXC	0x0B640  /* Timesync Auxiliary Control register */
+#define IGC_TSYNCRXCTL	0x0B620  /* Rx Time Sync Control register - RW */
+#define IGC_TSYNCTXCTL	0x0B614  /* Tx Time Sync Control register - RW */
+#define IGC_TSYNCRXCFG	0x05F50  /* Time Sync Rx Configuration - RW */
+#define IGC_TSSDP	0x0003C  /* Time Sync SDP Configuration Register - RW */
+
+#define IGC_IMIR(_i)	(0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
+#define IGC_IMIREXT(_i)	(0x05AA0 + ((_i) * 4))  /* Immediate INTR Ext*/
+
+#define IGC_FTQF(_n)	(0x059E0 + (4 * (_n)))  /* 5-tuple Queue Fltr */
+
+#define IGC_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
+
+/* System Time Registers */
+#define IGC_SYSTIML	0x0B600  /* System time register Low - RO */
+#define IGC_SYSTIMH	0x0B604  /* System time register High - RO */
+#define IGC_SYSTIMR	0x0B6F8  /* System time register Residue */
+#define IGC_TIMINCA	0x0B608  /* Increment attributes register - RW */
+
+#define IGC_RXSTMPL	0x0B624  /* Rx timestamp Low - RO */
+#define IGC_RXSTMPH	0x0B628  /* Rx timestamp High - RO */
+#define IGC_TXSTMPL	0x0B618  /* Tx timestamp value Low - RO */
+#define IGC_TXSTMPH	0x0B61C  /* Tx timestamp value High - RO */
+
 /* Management registers */
 #define IGC_MANC	0x05820  /* Management Control - RW */
 
 /* Shadow Ram Write Register - RW */
 #define IGC_SRWR	0x12018
 
+/* Wake Up registers */
+#define IGC_WUC		0x05800  /* Wakeup Control - RW */
+#define IGC_WUFC	0x05808  /* Wakeup Filter Control - RW */
+#define IGC_WUS		0x05810  /* Wakeup Status - R/W1C */
+#define IGC_WUPL	0x05900  /* Wakeup Packet Length - RW */
+
+/* Wake Up packet memory */
+#define IGC_WUPM_REG(_i)	(0x05A00 + ((_i) * 4))
+
 /* forward declaration */
 struct igc_hw;
 u32 igc_rd32(struct igc_hw *hw, u32 reg);
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 3d8c051dd327..b64e91ea3465 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -70,7 +70,7 @@ static int ixgb_clean(struct napi_struct *, int);
 static bool ixgb_clean_rx_irq(struct ixgb_adapter *, int *, int);
 static void ixgb_alloc_rx_buffers(struct ixgb_adapter *, int);
 
-static void ixgb_tx_timeout(struct net_device *dev);
+static void ixgb_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void ixgb_tx_timeout_task(struct work_struct *work);
 
 static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter);
@@ -1538,7 +1538,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  **/
 
 static void
-ixgb_tx_timeout(struct net_device *netdev)
+ixgb_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
index 171cdc552961..5b1cf49df3d3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
@@ -166,7 +166,9 @@ static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp,
 	ixgbe_dbg_netdev_ops_buf[len] = '\0';
 
 	if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) {
-		adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev);
+		/* TX Queue number below is wrong, but ixgbe does not use it */
+		adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev,
+							    UINT_MAX);
 		e_dev_info("tx_timeout called\n");
 	} else {
 		e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a2b2ad1f60b1..718931d951bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6175,7 +6175,7 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
  * ixgbe_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  **/
-static void ixgbe_tx_timeout(struct net_device *netdev)
+static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index b43be9f14105..74b540ebb3dc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -277,7 +277,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
 
 	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
 
-	xsk_umem_discard_addr(umem);
+	xsk_umem_release_addr(umem);
 	return true;
 }
 
@@ -304,7 +304,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
 
 	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
 
-	xsk_umem_discard_addr_rq(umem);
+	xsk_umem_release_addr_rq(umem);
 	return true;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 64ec0e7c64b4..4622c4ea2e46 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -250,7 +250,7 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter)
  * ixgbevf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  **/
-static void ixgbevf_tx_timeout(struct net_device *netdev)
+static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 25aa400e2e3c..2e4975572e9f 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2337,7 +2337,7 @@ jme_change_mtu(struct net_device *netdev, int new_mtu)
 }
 
 static void
-jme_tx_timeout(struct net_device *netdev)
+jme_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct jme_adapter *jme = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index ae195f8adff5..f1d84921e42b 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -917,7 +917,7 @@ static void korina_restart_task(struct work_struct *work)
 	enable_irq(lp->rx_irq);
 }
 
-static void korina_tx_timeout(struct net_device *dev)
+static void korina_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct korina_private *lp = netdev_priv(dev);
 
@@ -1043,7 +1043,7 @@ static int korina_probe(struct platform_device *pdev)
 
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs");
 	dev->base_addr = r->start;
-	lp->eth_regs = ioremap_nocache(r->start, resource_size(r));
+	lp->eth_regs = ioremap(r->start, resource_size(r));
 	if (!lp->eth_regs) {
 		printk(KERN_ERR DRV_NAME ": cannot remap registers\n");
 		rc = -ENXIO;
@@ -1051,7 +1051,7 @@ static int korina_probe(struct platform_device *pdev)
 	}
 
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_rx");
-	lp->rx_dma_regs = ioremap_nocache(r->start, resource_size(r));
+	lp->rx_dma_regs = ioremap(r->start, resource_size(r));
 	if (!lp->rx_dma_regs) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n");
 		rc = -ENXIO;
@@ -1059,7 +1059,7 @@ static int korina_probe(struct platform_device *pdev)
 	}
 
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_tx");
-	lp->tx_dma_regs = ioremap_nocache(r->start, resource_size(r));
+	lp->tx_dma_regs = ioremap(r->start, resource_size(r));
 	if (!lp->tx_dma_regs) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n");
 		rc = -ENXIO;
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 6e73ffe6f928..2d0c52f7106b 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -510,13 +510,6 @@ ltq_etop_change_mtu(struct net_device *dev, int new_mtu)
 }
 
 static int
-ltq_etop_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	/* TODO: mii-toll reports "No MII transceiver present!." ?!*/
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
-static int
 ltq_etop_set_mac_address(struct net_device *dev, void *p)
 {
 	int ret = eth_mac_addr(dev, p);
@@ -594,7 +587,7 @@ err_hw:
 }
 
 static void
-ltq_etop_tx_timeout(struct net_device *dev)
+ltq_etop_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	int err;
 
@@ -616,7 +609,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = {
 	.ndo_stop = ltq_etop_stop,
 	.ndo_start_xmit = ltq_etop_tx,
 	.ndo_change_mtu = ltq_etop_change_mtu,
-	.ndo_do_ioctl = ltq_etop_ioctl,
+	.ndo_do_ioctl = phy_do_ioctl,
 	.ndo_set_mac_address = ltq_etop_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_set_rx_mode = ltq_etop_set_multicast_list,
@@ -649,7 +642,7 @@ ltq_etop_probe(struct platform_device *pdev)
 		goto err_out;
 	}
 
-	ltq_etop_membase = devm_ioremap_nocache(&pdev->dev,
+	ltq_etop_membase = devm_ioremap(&pdev->dev,
 		res->start, resource_size(res));
 	if (!ltq_etop_membase) {
 		dev_err(&pdev->dev, "failed to remap etop engine %d\n",
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 65a093216dac..3c8125cbc84d 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2590,7 +2590,7 @@ static void tx_timeout_task(struct work_struct *ugly)
 	}
 }
 
-static void mv643xx_eth_tx_timeout(struct net_device *dev)
+static void mv643xx_eth_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 67ad8b8b127d..037e054b01a2 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -324,8 +324,7 @@
 	      ETH_HLEN + ETH_FCS_LEN,			     \
 	      cache_line_size())
 
-#define MVNETA_SKB_HEADROOM	(max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
-				 NET_IP_ALIGN)
+#define MVNETA_SKB_HEADROOM	max(XDP_PACKET_HEADROOM, NET_SKB_PAD)
 #define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
 			 MVNETA_SKB_HEADROOM))
 #define MVNETA_SKB_SIZE(len)	(SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
@@ -1167,6 +1166,7 @@ bm_mtu_err:
 	mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id);
 
 	pp->bm_priv = NULL;
+	pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
 	mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1);
 	netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n");
 }
@@ -3072,7 +3072,7 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
 		.order = 0,
 		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
 		.pool_size = size,
-		.nid = cpu_to_node(0),
+		.nid = NUMA_NO_NODE,
 		.dev = pp->dev->dev.parent,
 		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
 		.offset = pp->rx_offset_correction,
@@ -4226,6 +4226,12 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog,
 		return -EOPNOTSUPP;
 	}
 
+	if (pp->bm_priv) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Hardware Buffer Management not supported on XDP");
+		return -EOPNOTSUPP;
+	}
+
 	need_update = !!pp->xdp_prog != !!prog;
 	if (running && need_update)
 		mvneta_stop(dev);
@@ -4942,7 +4948,6 @@ static int mvneta_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	pp->id = global_port_id++;
-	pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
 
 	/* Obtain access to BM resources if enabled and already initialized */
 	bm_node = of_parse_phandle(dn, "buffer-manager", 0);
@@ -4967,6 +4972,10 @@ static int mvneta_probe(struct platform_device *pdev)
 	}
 	of_node_put(bm_node);
 
+	/* sw buffer management */
+	if (!pp->bm_priv)
+		pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
+
 	err = mvneta_init(&pdev->dev, pp);
 	if (err < 0)
 		goto err_netdev;
@@ -5124,6 +5133,7 @@ static int mvneta_resume(struct device *device)
 		err = mvneta_bm_port_init(pdev, pp);
 		if (err < 0) {
 			dev_info(&pdev->dev, "use SW buffer management\n");
+			pp->rx_offset_correction = MVNETA_SKB_HEADROOM;
 			pp->bm_priv = NULL;
 		}
 	}
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 14e372cda7f4..72133cbe55d4 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1114,7 +1114,7 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask)
 /* Port configuration routines */
 static bool mvpp2_is_xlg(phy_interface_t interface)
 {
-	return interface == PHY_INTERFACE_MODE_10GKR ||
+	return interface == PHY_INTERFACE_MODE_10GBASER ||
 	       interface == PHY_INTERFACE_MODE_XAUI;
 }
 
@@ -1200,7 +1200,7 @@ static int mvpp22_gop_init(struct mvpp2_port *port)
 	case PHY_INTERFACE_MODE_2500BASEX:
 		mvpp22_gop_init_sgmii(port);
 		break;
-	case PHY_INTERFACE_MODE_10GKR:
+	case PHY_INTERFACE_MODE_10GBASER:
 		if (port->gop_id != 0)
 			goto invalid_conf;
 		mvpp22_gop_init_10gkr(port);
@@ -1649,7 +1649,7 @@ static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port)
 	xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
 
 	switch (port->phy_interface) {
-	case PHY_INTERFACE_MODE_10GKR:
+	case PHY_INTERFACE_MODE_10GBASER:
 		val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
 		val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX |
 		       MAC_CLK_RESET_SD_TX;
@@ -4758,7 +4758,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
 
 	/* Invalid combinations */
 	switch (state->interface) {
-	case PHY_INTERFACE_MODE_10GKR:
+	case PHY_INTERFACE_MODE_10GBASER:
 	case PHY_INTERFACE_MODE_XAUI:
 		if (port->gop_id != 0)
 			goto empty_set;
@@ -4780,7 +4780,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
 	phylink_set(mask, Asym_Pause);
 
 	switch (state->interface) {
-	case PHY_INTERFACE_MODE_10GKR:
+	case PHY_INTERFACE_MODE_10GBASER:
 	case PHY_INTERFACE_MODE_XAUI:
 	case PHY_INTERFACE_MODE_NA:
 		if (port->gop_id == 0) {
@@ -4792,6 +4792,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
 			phylink_set(mask, 10000baseER_Full);
 			phylink_set(mask, 10000baseKR_Full);
 		}
+		if (state->interface != PHY_INTERFACE_MODE_NA)
+			break;
 		/* Fall-through */
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_ID:
@@ -4802,13 +4804,23 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
 		phylink_set(mask, 10baseT_Full);
 		phylink_set(mask, 100baseT_Half);
 		phylink_set(mask, 100baseT_Full);
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseX_Full);
+		if (state->interface != PHY_INTERFACE_MODE_NA)
+			break;
 		/* Fall-through */
 	case PHY_INTERFACE_MODE_1000BASEX:
 	case PHY_INTERFACE_MODE_2500BASEX:
-		phylink_set(mask, 1000baseT_Full);
-		phylink_set(mask, 1000baseX_Full);
-		phylink_set(mask, 2500baseT_Full);
-		phylink_set(mask, 2500baseX_Full);
+		if (port->comphy ||
+		    state->interface != PHY_INTERFACE_MODE_2500BASEX) {
+			phylink_set(mask, 1000baseT_Full);
+			phylink_set(mask, 1000baseX_Full);
+		}
+		if (port->comphy ||
+		    state->interface == PHY_INTERFACE_MODE_2500BASEX) {
+			phylink_set(mask, 2500baseT_Full);
+			phylink_set(mask, 2500baseX_Full);
+		}
 		break;
 	default:
 		goto empty_set;
@@ -4817,6 +4829,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
 	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
 	bitmap_and(state->advertising, state->advertising, mask,
 		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	phylink_helper_basex_speed(state);
 	return;
 
 empty_set:
@@ -5233,6 +5247,15 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		goto err_free_netdev;
 	}
 
+	/*
+	 * Rewrite 10GBASE-KR to 10GBASE-R for compatibility with existing DT.
+	 * Existing usage of 10GBASE-KR is not correct; no backplane
+	 * negotiation is done, and this driver does not actually support
+	 * 10GBASE-KR.
+	 */
+	if (phy_mode == PHY_INTERFACE_MODE_10GKR)
+		phy_mode = PHY_INTERFACE_MODE_10GBASER;
+
 	if (port_node) {
 		comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
 		if (IS_ERR(comphy)) {
@@ -5411,6 +5434,16 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		port->phylink = NULL;
 	}
 
+	/* Cycle the comphy to power it down, saving 270mW per port -
+	 * don't worry about an error powering it up. When the comphy
+	 * driver does this, we can remove this code.
+	 */
+	if (port->comphy) {
+		err = mvpp22_comphy_init(port);
+		if (err == 0)
+			phy_power_off(port->comphy);
+	}
+
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register netdev\n");
diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig
index fb34fbd62088..ced514c05c97 100644
--- a/drivers/net/ethernet/marvell/octeontx2/Kconfig
+++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig
@@ -25,3 +25,11 @@ config NDC_DIS_DYNAMIC_CACHING
 	  This config option disables caching of dynamic entries such as NIX SQEs
 	  , NPA stack pages etc in NDC. Also locks down NIX SQ/CQ/RQ/RSS and
 	  NPA Aura/Pool contexts.
+
+config OCTEONTX2_PF
+	tristate "Marvell OcteonTX2 NIC Physical Function driver"
+	select OCTEONTX2_MBOX
+	depends on (64BIT && COMPILE_TEST) || ARM64
+	depends on PCI
+	help
+	  This driver supports Marvell's OcteonTX2 NIC physical function.
diff --git a/drivers/net/ethernet/marvell/octeontx2/Makefile b/drivers/net/ethernet/marvell/octeontx2/Makefile
index e579dcd54c97..0064a69e0f72 100644
--- a/drivers/net/ethernet/marvell/octeontx2/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/Makefile
@@ -3,4 +3,6 @@
 # Makefile for Marvell OcteonTX2 device drivers.
 #
 
+obj-$(CONFIG_OCTEONTX2_MBOX) += af/
 obj-$(CONFIG_OCTEONTX2_AF) += af/
+obj-$(CONFIG_OCTEONTX2_PF) += nic/
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
index 784207bae5f8..cd33c2e6ca5f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -143,8 +143,13 @@ enum nix_scheduler {
 	NIX_TXSCH_LVL_CNT = 0x5,
 };
 
-#define TXSCH_TL1_DFLT_RR_QTM      ((1 << 24) - 1)
-#define TXSCH_TL1_DFLT_RR_PRIO     (0x1ull)
+#define TXSCH_RR_QTM_MAX		((1 << 24) - 1)
+#define TXSCH_TL1_DFLT_RR_QTM		TXSCH_RR_QTM_MAX
+#define TXSCH_TL1_DFLT_RR_PRIO		(0x1ull)
+#define MAX_SCHED_WEIGHT		0xFF
+#define DFLT_RR_WEIGHT			71
+#define DFLT_RR_QTM	((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
+			 / MAX_SCHED_WEIGHT)
 
 /* Min/Max packet sizes, excluding FCS */
 #define	NIC_HW_MIN_FRS			40
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index a589748f1240..8bbc1f1d81f5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -210,7 +210,8 @@ M(NIX_SET_RX_CFG,	0x8010, nix_set_rx_cfg, nix_rx_cfg, msg_rsp)	\
 M(NIX_LSO_FORMAT_CFG,	0x8011, nix_lso_format_cfg,			\
 				 nix_lso_format_cfg,			\
 				 nix_lso_format_cfg_rsp)		\
-M(NIX_RXVLAN_ALLOC,	0x8012, nix_rxvlan_alloc, msg_req, msg_rsp)
+M(NIX_RXVLAN_ALLOC,	0x8012, nix_rxvlan_alloc, msg_req, msg_rsp)	\
+M(NIX_GET_MAC_ADDR, 0x8018, nix_get_mac_addr, msg_req, nix_get_mac_addr_rsp) \
 
 /* Messages initiated by AF (range 0xC00 - 0xDFF) */
 #define MBOX_UP_CGX_MESSAGES						\
@@ -618,6 +619,11 @@ struct nix_set_mac_addr {
 	u8 mac_addr[ETH_ALEN]; /* MAC address to be set for this pcifunc */
 };
 
+struct nix_get_mac_addr_rsp {
+	struct mbox_msghdr hdr;
+	u8 mac_addr[ETH_ALEN];
+};
+
 struct nix_mark_format_cfg {
 	struct mbox_msghdr hdr;
 	u8 offset;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 8a59f7d53fbf..eb5e542424e7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2546,6 +2546,23 @@ int rvu_mbox_handler_nix_set_mac_addr(struct rvu *rvu,
 	return 0;
 }
 
+int rvu_mbox_handler_nix_get_mac_addr(struct rvu *rvu,
+				      struct msg_req *req,
+				      struct nix_get_mac_addr_rsp *rsp)
+{
+	u16 pcifunc = req->hdr.pcifunc;
+	struct rvu_pfvf *pfvf;
+
+	if (!is_nixlf_attached(rvu, pcifunc))
+		return NIX_AF_ERR_AF_LF_INVALID;
+
+	pfvf = rvu_get_pfvf(rvu, pcifunc);
+
+	ether_addr_copy(rsp->mac_addr, pfvf->mac_addr);
+
+	return 0;
+}
+
 int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
 				     struct msg_rsp *rsp)
 {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
new file mode 100644
index 000000000000..41bf00cf5b1d
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Marvell's OcteonTX2 ethernet device drivers
+#
+
+obj-$(CONFIG_OCTEONTX2_PF) += octeontx2_nicpf.o
+
+octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o
+
+ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
new file mode 100644
index 000000000000..8247d21d0432
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -0,0 +1,1410 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <net/tso.h>
+
+#include "otx2_reg.h"
+#include "otx2_common.h"
+#include "otx2_struct.h"
+
+static void otx2_nix_rq_op_stats(struct queue_stats *stats,
+				 struct otx2_nic *pfvf, int qidx)
+{
+	u64 incr = (u64)qidx << 32;
+	u64 *ptr;
+
+	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_OCTS);
+	stats->bytes = otx2_atomic64_add(incr, ptr);
+
+	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_PKTS);
+	stats->pkts = otx2_atomic64_add(incr, ptr);
+}
+
+static void otx2_nix_sq_op_stats(struct queue_stats *stats,
+				 struct otx2_nic *pfvf, int qidx)
+{
+	u64 incr = (u64)qidx << 32;
+	u64 *ptr;
+
+	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_OCTS);
+	stats->bytes = otx2_atomic64_add(incr, ptr);
+
+	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_PKTS);
+	stats->pkts = otx2_atomic64_add(incr, ptr);
+}
+
+void otx2_update_lmac_stats(struct otx2_nic *pfvf)
+{
+	struct msg_req *req;
+
+	if (!netif_running(pfvf->netdev))
+		return;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	req = otx2_mbox_alloc_msg_cgx_stats(&pfvf->mbox);
+	if (!req) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return;
+	}
+
+	otx2_sync_mbox_msg(&pfvf->mbox);
+	otx2_mbox_unlock(&pfvf->mbox);
+}
+
+int otx2_update_rq_stats(struct otx2_nic *pfvf, int qidx)
+{
+	struct otx2_rcv_queue *rq = &pfvf->qset.rq[qidx];
+
+	if (!pfvf->qset.rq)
+		return 0;
+
+	otx2_nix_rq_op_stats(&rq->stats, pfvf, qidx);
+	return 1;
+}
+
+int otx2_update_sq_stats(struct otx2_nic *pfvf, int qidx)
+{
+	struct otx2_snd_queue *sq = &pfvf->qset.sq[qidx];
+
+	if (!pfvf->qset.sq)
+		return 0;
+
+	otx2_nix_sq_op_stats(&sq->stats, pfvf, qidx);
+	return 1;
+}
+
+void otx2_get_dev_stats(struct otx2_nic *pfvf)
+{
+	struct otx2_dev_stats *dev_stats = &pfvf->hw.dev_stats;
+
+#define OTX2_GET_RX_STATS(reg) \
+	 otx2_read64(pfvf, NIX_LF_RX_STATX(reg))
+#define OTX2_GET_TX_STATS(reg) \
+	 otx2_read64(pfvf, NIX_LF_TX_STATX(reg))
+
+	dev_stats->rx_bytes = OTX2_GET_RX_STATS(RX_OCTS);
+	dev_stats->rx_drops = OTX2_GET_RX_STATS(RX_DROP);
+	dev_stats->rx_bcast_frames = OTX2_GET_RX_STATS(RX_BCAST);
+	dev_stats->rx_mcast_frames = OTX2_GET_RX_STATS(RX_MCAST);
+	dev_stats->rx_ucast_frames = OTX2_GET_RX_STATS(RX_UCAST);
+	dev_stats->rx_frames = dev_stats->rx_bcast_frames +
+			       dev_stats->rx_mcast_frames +
+			       dev_stats->rx_ucast_frames;
+
+	dev_stats->tx_bytes = OTX2_GET_TX_STATS(TX_OCTS);
+	dev_stats->tx_drops = OTX2_GET_TX_STATS(TX_DROP);
+	dev_stats->tx_bcast_frames = OTX2_GET_TX_STATS(TX_BCAST);
+	dev_stats->tx_mcast_frames = OTX2_GET_TX_STATS(TX_MCAST);
+	dev_stats->tx_ucast_frames = OTX2_GET_TX_STATS(TX_UCAST);
+	dev_stats->tx_frames = dev_stats->tx_bcast_frames +
+			       dev_stats->tx_mcast_frames +
+			       dev_stats->tx_ucast_frames;
+}
+
+void otx2_get_stats64(struct net_device *netdev,
+		      struct rtnl_link_stats64 *stats)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct otx2_dev_stats *dev_stats;
+
+	otx2_get_dev_stats(pfvf);
+
+	dev_stats = &pfvf->hw.dev_stats;
+	stats->rx_bytes = dev_stats->rx_bytes;
+	stats->rx_packets = dev_stats->rx_frames;
+	stats->rx_dropped = dev_stats->rx_drops;
+	stats->multicast = dev_stats->rx_mcast_frames;
+
+	stats->tx_bytes = dev_stats->tx_bytes;
+	stats->tx_packets = dev_stats->tx_frames;
+	stats->tx_dropped = dev_stats->tx_drops;
+}
+
+/* Sync MAC address with RVU AF */
+static int otx2_hw_set_mac_addr(struct otx2_nic *pfvf, u8 *mac)
+{
+	struct nix_set_mac_addr *req;
+	int err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	req = otx2_mbox_alloc_msg_nix_set_mac_addr(&pfvf->mbox);
+	if (!req) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	ether_addr_copy(req->mac_addr, mac);
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	otx2_mbox_unlock(&pfvf->mbox);
+	return err;
+}
+
+static int otx2_hw_get_mac_addr(struct otx2_nic *pfvf,
+				struct net_device *netdev)
+{
+	struct nix_get_mac_addr_rsp *rsp;
+	struct mbox_msghdr *msghdr;
+	struct msg_req *req;
+	int err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	req = otx2_mbox_alloc_msg_nix_get_mac_addr(&pfvf->mbox);
+	if (!req) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return err;
+	}
+
+	msghdr = otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+	if (!msghdr) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+	rsp = (struct nix_get_mac_addr_rsp *)msghdr;
+	ether_addr_copy(netdev->dev_addr, rsp->mac_addr);
+	otx2_mbox_unlock(&pfvf->mbox);
+
+	return 0;
+}
+
+int otx2_set_mac_address(struct net_device *netdev, void *p)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (!otx2_hw_set_mac_addr(pfvf, addr->sa_data))
+		memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	else
+		return -EPERM;
+
+	return 0;
+}
+
+int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
+{
+	struct nix_frs_cfg *req;
+	int err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	req = otx2_mbox_alloc_msg_nix_set_hw_frs(&pfvf->mbox);
+	if (!req) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	/* SMQ config limits maximum pkt size that can be transmitted */
+	req->update_smq = true;
+	pfvf->max_frs = mtu +  OTX2_ETH_HLEN;
+	req->maxlen = pfvf->max_frs;
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	otx2_mbox_unlock(&pfvf->mbox);
+	return err;
+}
+
+int otx2_set_flowkey_cfg(struct otx2_nic *pfvf)
+{
+	struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+	struct nix_rss_flowkey_cfg *req;
+	int err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	req = otx2_mbox_alloc_msg_nix_rss_flowkey_cfg(&pfvf->mbox);
+	if (!req) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+	req->mcam_index = -1; /* Default or reserved index */
+	req->flowkey_cfg = rss->flowkey_cfg;
+	req->group = DEFAULT_RSS_CONTEXT_GROUP;
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	otx2_mbox_unlock(&pfvf->mbox);
+	return err;
+}
+
+int otx2_set_rss_table(struct otx2_nic *pfvf)
+{
+	struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+	struct mbox *mbox = &pfvf->mbox;
+	struct nix_aq_enq_req *aq;
+	int idx, err;
+
+	otx2_mbox_lock(mbox);
+	/* Get memory to put this msg */
+	for (idx = 0; idx < rss->rss_size; idx++) {
+		aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox);
+		if (!aq) {
+			/* The shared memory buffer can be full.
+			 * Flush it and retry
+			 */
+			err = otx2_sync_mbox_msg(mbox);
+			if (err) {
+				otx2_mbox_unlock(mbox);
+				return err;
+			}
+			aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox);
+			if (!aq) {
+				otx2_mbox_unlock(mbox);
+				return -ENOMEM;
+			}
+		}
+
+		aq->rss.rq = rss->ind_tbl[idx];
+
+		/* Fill AQ info */
+		aq->qidx = idx;
+		aq->ctype = NIX_AQ_CTYPE_RSS;
+		aq->op = NIX_AQ_INSTOP_INIT;
+	}
+	err = otx2_sync_mbox_msg(mbox);
+	otx2_mbox_unlock(mbox);
+	return err;
+}
+
+void otx2_set_rss_key(struct otx2_nic *pfvf)
+{
+	struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+	u64 *key = (u64 *)&rss->key[4];
+	int idx;
+
+	/* 352bit or 44byte key needs to be configured as below
+	 * NIX_LF_RX_SECRETX0 = key<351:288>
+	 * NIX_LF_RX_SECRETX1 = key<287:224>
+	 * NIX_LF_RX_SECRETX2 = key<223:160>
+	 * NIX_LF_RX_SECRETX3 = key<159:96>
+	 * NIX_LF_RX_SECRETX4 = key<95:32>
+	 * NIX_LF_RX_SECRETX5<63:32> = key<31:0>
+	 */
+	otx2_write64(pfvf, NIX_LF_RX_SECRETX(5),
+		     (u64)(*((u32 *)&rss->key)) << 32);
+	idx = sizeof(rss->key) / sizeof(u64);
+	while (idx > 0) {
+		idx--;
+		otx2_write64(pfvf, NIX_LF_RX_SECRETX(idx), *key++);
+	}
+}
+
+int otx2_rss_init(struct otx2_nic *pfvf)
+{
+	struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+	int idx, ret = 0;
+
+	rss->rss_size = sizeof(rss->ind_tbl);
+
+	/* Init RSS key if it is not setup already */
+	if (!rss->enable)
+		netdev_rss_key_fill(rss->key, sizeof(rss->key));
+	otx2_set_rss_key(pfvf);
+
+	if (!netif_is_rxfh_configured(pfvf->netdev)) {
+		/* Default indirection table */
+		for (idx = 0; idx < rss->rss_size; idx++)
+			rss->ind_tbl[idx] =
+				ethtool_rxfh_indir_default(idx,
+							   pfvf->hw.rx_queues);
+	}
+	ret = otx2_set_rss_table(pfvf);
+	if (ret)
+		return ret;
+
+	/* Flowkey or hash config to be used for generating flow tag */
+	rss->flowkey_cfg = rss->enable ? rss->flowkey_cfg :
+			   NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6 |
+			   NIX_FLOW_KEY_TYPE_TCP | NIX_FLOW_KEY_TYPE_UDP |
+			   NIX_FLOW_KEY_TYPE_SCTP;
+
+	ret = otx2_set_flowkey_cfg(pfvf);
+	if (ret)
+		return ret;
+
+	rss->enable = true;
+	return 0;
+}
+
+void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
+{
+	/* Configure CQE interrupt coalescing parameters
+	 *
+	 * HW triggers an irq when ECOUNT > cq_ecount_wait, hence
+	 * set 1 less than cq_ecount_wait. And cq_time_wait is in
+	 * usecs, convert that to 100ns count.
+	 */
+	otx2_write64(pfvf, NIX_LF_CINTX_WAIT(qidx),
+		     ((u64)(pfvf->hw.cq_time_wait * 10) << 48) |
+		     ((u64)pfvf->hw.cq_qcount_wait << 32) |
+		     (pfvf->hw.cq_ecount_wait - 1));
+}
+
+dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			   gfp_t gfp)
+{
+	dma_addr_t iova;
+
+	/* Check if request can be accommodated in previous allocated page */
+	if (pool->page && ((pool->page_offset + pool->rbsize) <=
+	    (PAGE_SIZE << pool->rbpage_order))) {
+		pool->pageref++;
+		goto ret;
+	}
+
+	otx2_get_page(pool);
+
+	/* Allocate a new page */
+	pool->page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+				 pool->rbpage_order);
+	if (unlikely(!pool->page))
+		return -ENOMEM;
+
+	pool->page_offset = 0;
+ret:
+	iova = (u64)otx2_dma_map_page(pfvf, pool->page, pool->page_offset,
+				      pool->rbsize, DMA_FROM_DEVICE);
+	if (!iova) {
+		if (!pool->page_offset)
+			__free_pages(pool->page, pool->rbpage_order);
+		pool->page = NULL;
+		return -ENOMEM;
+	}
+	pool->page_offset += pool->rbsize;
+	return iova;
+}
+
+void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+
+	schedule_work(&pfvf->reset_task);
+}
+
+void otx2_get_mac_from_af(struct net_device *netdev)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	int err;
+
+	err = otx2_hw_get_mac_addr(pfvf, netdev);
+	if (err)
+		dev_warn(pfvf->dev, "Failed to read mac from hardware\n");
+
+	/* If AF doesn't provide a valid MAC, generate a random one */
+	if (!is_valid_ether_addr(netdev->dev_addr))
+		eth_hw_addr_random(netdev);
+}
+
+static int otx2_get_link(struct otx2_nic *pfvf)
+{
+	int link = 0;
+	u16 map;
+
+	/* cgx lmac link */
+	if (pfvf->hw.tx_chan_base >= CGX_CHAN_BASE) {
+		map = pfvf->hw.tx_chan_base & 0x7FF;
+		link = 4 * ((map >> 8) & 0xF) + ((map >> 4) & 0xF);
+	}
+	/* LBK channel */
+	if (pfvf->hw.tx_chan_base < SDP_CHAN_BASE)
+		link = 12;
+
+	return link;
+}
+
+int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	struct nix_txschq_config *req;
+	u64 schq, parent;
+
+	req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
+	if (!req)
+		return -ENOMEM;
+
+	req->lvl = lvl;
+	req->num_regs = 1;
+
+	schq = hw->txschq_list[lvl][0];
+	/* Set topology e.t.c configuration */
+	if (lvl == NIX_TXSCH_LVL_SMQ) {
+		req->reg[0] = NIX_AF_SMQX_CFG(schq);
+		req->regval[0] = ((pfvf->netdev->mtu  + OTX2_ETH_HLEN) << 8) |
+				   OTX2_MIN_MTU;
+
+		req->regval[0] |= (0x20ULL << 51) | (0x80ULL << 39) |
+				  (0x2ULL << 36);
+		req->num_regs++;
+		/* MDQ config */
+		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+		req->reg[1] = NIX_AF_MDQX_PARENT(schq);
+		req->regval[1] = parent << 16;
+		req->num_regs++;
+		/* Set DWRR quantum */
+		req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
+		req->regval[2] =  DFLT_RR_QTM;
+	} else if (lvl == NIX_TXSCH_LVL_TL4) {
+		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
+		req->reg[0] = NIX_AF_TL4X_PARENT(schq);
+		req->regval[0] = parent << 16;
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
+		req->regval[1] = DFLT_RR_QTM;
+	} else if (lvl == NIX_TXSCH_LVL_TL3) {
+		parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
+		req->reg[0] = NIX_AF_TL3X_PARENT(schq);
+		req->regval[0] = parent << 16;
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
+		req->regval[1] = DFLT_RR_QTM;
+	} else if (lvl == NIX_TXSCH_LVL_TL2) {
+		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
+		req->reg[0] = NIX_AF_TL2X_PARENT(schq);
+		req->regval[0] = parent << 16;
+
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
+		req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+
+		req->num_regs++;
+		req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
+							otx2_get_link(pfvf));
+		/* Enable this queue and backpressure */
+		req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
+
+	} else if (lvl == NIX_TXSCH_LVL_TL1) {
+		/* Default config for TL1.
+		 * For VF this is always ignored.
+		 */
+
+		/* Set DWRR quantum */
+		req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
+		req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
+
+		req->num_regs++;
+		req->reg[1] = NIX_AF_TL1X_TOPOLOGY(schq);
+		req->regval[1] = (TXSCH_TL1_DFLT_RR_PRIO << 1);
+
+		req->num_regs++;
+		req->reg[2] = NIX_AF_TL1X_CIR(schq);
+		req->regval[2] = 0;
+	}
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int otx2_txsch_alloc(struct otx2_nic *pfvf)
+{
+	struct nix_txsch_alloc_req *req;
+	int lvl;
+
+	/* Get memory to put this msg */
+	req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox);
+	if (!req)
+		return -ENOMEM;
+
+	/* Request one schq per level */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
+		req->schq[lvl] = 1;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int otx2_txschq_stop(struct otx2_nic *pfvf)
+{
+	struct nix_txsch_free_req *free_req;
+	int lvl, schq, err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	/* Free the transmit schedulers */
+	free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox);
+	if (!free_req) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	free_req->flags = TXSCHQ_FREE_ALL;
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	otx2_mbox_unlock(&pfvf->mbox);
+
+	/* Clear the txschq list */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		for (schq = 0; schq < MAX_TXSCHQ_PER_FUNC; schq++)
+			pfvf->hw.txschq_list[lvl][schq] = 0;
+	}
+	return err;
+}
+
+void otx2_sqb_flush(struct otx2_nic *pfvf)
+{
+	int qidx, sqe_tail, sqe_head;
+	u64 incr, *ptr, val;
+
+	ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS);
+	for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+		incr = (u64)qidx << 32;
+		while (1) {
+			val = otx2_atomic64_add(incr, ptr);
+			sqe_head = (val >> 20) & 0x3F;
+			sqe_tail = (val >> 28) & 0x3F;
+			if (sqe_head == sqe_tail)
+				break;
+			usleep_range(1, 3);
+		}
+	}
+}
+
+/* RED and drop levels of CQ on packet reception.
+ * For CQ level is measure of emptiness ( 0x0 = full, 255 = empty).
+ */
+#define RQ_PASS_LVL_CQ(skid, qsize)	((((skid) + 16) * 256) / (qsize))
+#define RQ_DROP_LVL_CQ(skid, qsize)	(((skid) * 256) / (qsize))
+
+/* RED and drop levels of AURA for packet reception.
+ * For AURA level is measure of fullness (0x0 = empty, 255 = full).
+ * Eg: For RQ length 1K, for pass/drop level 204/230.
+ * RED accepts pkts if free pointers > 102 & <= 205.
+ * Drops pkts if free pointers < 102.
+ */
+#define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */
+#define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */
+
+/* Send skid of 2000 packets required for CQ size of 4K CQEs. */
+#define SEND_CQ_SKID	2000
+
+static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct nix_aq_enq_req *aq;
+
+	/* Get memory to put this msg */
+	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	aq->rq.cq = qidx;
+	aq->rq.ena = 1;
+	aq->rq.pb_caching = 1;
+	aq->rq.lpb_aura = lpb_aura; /* Use large packet buffer aura */
+	aq->rq.lpb_sizem1 = (DMA_BUFFER_LEN(pfvf->rbsize) / 8) - 1;
+	aq->rq.xqe_imm_size = 0; /* Copying of packet to CQE not needed */
+	aq->rq.flow_tagw = 32; /* Copy full 32bit flow_tag to CQE header */
+	aq->rq.qint_idx = 0;
+	aq->rq.lpb_drop_ena = 1; /* Enable RED dropping for AURA */
+	aq->rq.xqe_drop_ena = 1; /* Enable RED dropping for CQ/SSO */
+	aq->rq.xqe_pass = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+	aq->rq.xqe_drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+	aq->rq.lpb_aura_pass = RQ_PASS_LVL_AURA;
+	aq->rq.lpb_aura_drop = RQ_DROP_LVL_AURA;
+
+	/* Fill AQ info */
+	aq->qidx = qidx;
+	aq->ctype = NIX_AQ_CTYPE_RQ;
+	aq->op = NIX_AQ_INSTOP_INIT;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_snd_queue *sq;
+	struct nix_aq_enq_req *aq;
+	struct otx2_pool *pool;
+	int err;
+
+	pool = &pfvf->qset.pool[sqb_aura];
+	sq = &qset->sq[qidx];
+	sq->sqe_size = NIX_SQESZ_W16 ? 64 : 128;
+	sq->sqe_cnt = qset->sqe_cnt;
+
+	err = qmem_alloc(pfvf->dev, &sq->sqe, 1, sq->sqe_size);
+	if (err)
+		return err;
+
+	err = qmem_alloc(pfvf->dev, &sq->tso_hdrs, qset->sqe_cnt,
+			 TSO_HEADER_SIZE);
+	if (err)
+		return err;
+
+	sq->sqe_base = sq->sqe->base;
+	sq->sg = kcalloc(qset->sqe_cnt, sizeof(struct sg_list), GFP_KERNEL);
+	if (!sq->sg)
+		return -ENOMEM;
+
+	sq->head = 0;
+	sq->sqe_per_sqb = (pfvf->hw.sqb_size / sq->sqe_size) - 1;
+	sq->num_sqbs = (qset->sqe_cnt + sq->sqe_per_sqb) / sq->sqe_per_sqb;
+	/* Set SQE threshold to 10% of total SQEs */
+	sq->sqe_thresh = ((sq->num_sqbs * sq->sqe_per_sqb) * 10) / 100;
+	sq->aura_id = sqb_aura;
+	sq->aura_fc_addr = pool->fc_addr->base;
+	sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx));
+	sq->io_addr = (__force u64)otx2_get_regaddr(pfvf, NIX_LF_OP_SENDX(0));
+
+	sq->stats.bytes = 0;
+	sq->stats.pkts = 0;
+
+	/* Get memory to put this msg */
+	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	aq->sq.cq = pfvf->hw.rx_queues + qidx;
+	aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
+	aq->sq.cq_ena = 1;
+	aq->sq.ena = 1;
+	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
+	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+	aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+	aq->sq.default_chan = pfvf->hw.tx_chan_base;
+	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
+	aq->sq.sqb_aura = sqb_aura;
+	aq->sq.sq_int_ena = NIX_SQINT_BITS;
+	aq->sq.qint_idx = 0;
+	/* Due pipelining impact minimum 2000 unused SQ CQE's
+	 * need to maintain to avoid CQ overflow.
+	 */
+	aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (sq->sqe_cnt));
+
+	/* Fill AQ info */
+	aq->qidx = qidx;
+	aq->ctype = NIX_AQ_CTYPE_SQ;
+	aq->op = NIX_AQ_INSTOP_INIT;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct nix_aq_enq_req *aq;
+	struct otx2_cq_queue *cq;
+	int err, pool_id;
+
+	cq = &qset->cq[qidx];
+	cq->cq_idx = qidx;
+	if (qidx < pfvf->hw.rx_queues) {
+		cq->cq_type = CQ_RX;
+		cq->cint_idx = qidx;
+		cq->cqe_cnt = qset->rqe_cnt;
+	} else {
+		cq->cq_type = CQ_TX;
+		cq->cint_idx = qidx - pfvf->hw.rx_queues;
+		cq->cqe_cnt = qset->sqe_cnt;
+	}
+	cq->cqe_size = pfvf->qset.xqe_size;
+
+	/* Allocate memory for CQEs */
+	err = qmem_alloc(pfvf->dev, &cq->cqe, cq->cqe_cnt, cq->cqe_size);
+	if (err)
+		return err;
+
+	/* Save CQE CPU base for faster reference */
+	cq->cqe_base = cq->cqe->base;
+	/* In case where all RQs auras point to single pool,
+	 * all CQs receive buffer pool also point to same pool.
+	 */
+	pool_id = ((cq->cq_type == CQ_RX) &&
+		   (pfvf->hw.rqpool_cnt != pfvf->hw.rx_queues)) ? 0 : qidx;
+	cq->rbpool = &qset->pool[pool_id];
+	cq->refill_task_sched = false;
+
+	/* Get memory to put this msg */
+	aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+	if (!aq)
+		return -ENOMEM;
+
+	aq->cq.ena = 1;
+	aq->cq.qsize = Q_SIZE(cq->cqe_cnt, 4);
+	aq->cq.caching = 1;
+	aq->cq.base = cq->cqe->iova;
+	aq->cq.cint_idx = cq->cint_idx;
+	aq->cq.cq_err_int_ena = NIX_CQERRINT_BITS;
+	aq->cq.qint_idx = 0;
+	aq->cq.avg_level = 255;
+
+	if (qidx < pfvf->hw.rx_queues) {
+		aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt);
+		aq->cq.drop_ena = 1;
+	}
+
+	/* Fill AQ info */
+	aq->qidx = qidx;
+	aq->ctype = NIX_AQ_CTYPE_CQ;
+	aq->op = NIX_AQ_INSTOP_INIT;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+static void otx2_pool_refill_task(struct work_struct *work)
+{
+	struct otx2_cq_queue *cq;
+	struct otx2_pool *rbpool;
+	struct refill_work *wrk;
+	int qidx, free_ptrs = 0;
+	struct otx2_nic *pfvf;
+	s64 bufptr;
+
+	wrk = container_of(work, struct refill_work, pool_refill_work.work);
+	pfvf = wrk->pf;
+	qidx = wrk - pfvf->refill_wrk;
+	cq = &pfvf->qset.cq[qidx];
+	rbpool = cq->rbpool;
+	free_ptrs = cq->pool_ptrs;
+
+	while (cq->pool_ptrs) {
+		bufptr = otx2_alloc_rbuf(pfvf, rbpool, GFP_KERNEL);
+		if (bufptr <= 0) {
+			/* Schedule a WQ if we fails to free atleast half of the
+			 * pointers else enable napi for this RQ.
+			 */
+			if (!((free_ptrs - cq->pool_ptrs) > free_ptrs / 2)) {
+				struct delayed_work *dwork;
+
+				dwork = &wrk->pool_refill_work;
+				schedule_delayed_work(dwork,
+						      msecs_to_jiffies(100));
+			} else {
+				cq->refill_task_sched = false;
+			}
+			return;
+		}
+		otx2_aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
+		cq->pool_ptrs--;
+	}
+	cq->refill_task_sched = false;
+}
+
+int otx2_config_nix_queues(struct otx2_nic *pfvf)
+{
+	int qidx, err;
+
+	/* Initialize RX queues */
+	for (qidx = 0; qidx < pfvf->hw.rx_queues; qidx++) {
+		u16 lpb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx);
+
+		err = otx2_rq_init(pfvf, qidx, lpb_aura);
+		if (err)
+			return err;
+	}
+
+	/* Initialize TX queues */
+	for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+		u16 sqb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+
+		err = otx2_sq_init(pfvf, qidx, sqb_aura);
+		if (err)
+			return err;
+	}
+
+	/* Initialize completion queues */
+	for (qidx = 0; qidx < pfvf->qset.cq_cnt; qidx++) {
+		err = otx2_cq_init(pfvf, qidx);
+		if (err)
+			return err;
+	}
+
+	/* Initialize work queue for receive buffer refill */
+	pfvf->refill_wrk = devm_kcalloc(pfvf->dev, pfvf->qset.cq_cnt,
+					sizeof(struct refill_work), GFP_KERNEL);
+	if (!pfvf->refill_wrk)
+		return -ENOMEM;
+
+	for (qidx = 0; qidx < pfvf->qset.cq_cnt; qidx++) {
+		pfvf->refill_wrk[qidx].pf = pfvf;
+		INIT_DELAYED_WORK(&pfvf->refill_wrk[qidx].pool_refill_work,
+				  otx2_pool_refill_task);
+	}
+	return 0;
+}
+
+int otx2_config_nix(struct otx2_nic *pfvf)
+{
+	struct nix_lf_alloc_req  *nixlf;
+	struct nix_lf_alloc_rsp *rsp;
+	int err;
+
+	pfvf->qset.xqe_size = NIX_XQESZ_W16 ? 128 : 512;
+
+	/* Get memory to put this msg */
+	nixlf = otx2_mbox_alloc_msg_nix_lf_alloc(&pfvf->mbox);
+	if (!nixlf)
+		return -ENOMEM;
+
+	/* Set RQ/SQ/CQ counts */
+	nixlf->rq_cnt = pfvf->hw.rx_queues;
+	nixlf->sq_cnt = pfvf->hw.tx_queues;
+	nixlf->cq_cnt = pfvf->qset.cq_cnt;
+	nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE;
+	nixlf->rss_grps = 1; /* Single RSS indir table supported, for now */
+	nixlf->xqe_sz = NIX_XQESZ_W16;
+	/* We don't know absolute NPA LF idx attached.
+	 * AF will replace 'RVU_DEFAULT_PF_FUNC' with
+	 * NPA LF attached to this RVU PF/VF.
+	 */
+	nixlf->npa_func = RVU_DEFAULT_PF_FUNC;
+	/* Disable alignment pad, enable L2 length check,
+	 * enable L4 TCP/UDP checksum verification.
+	 */
+	nixlf->rx_cfg = BIT_ULL(33) | BIT_ULL(35) | BIT_ULL(37);
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err)
+		return err;
+
+	rsp = (struct nix_lf_alloc_rsp *)otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0,
+							   &nixlf->hdr);
+	if (IS_ERR(rsp))
+		return PTR_ERR(rsp);
+
+	if (rsp->qints < 1)
+		return -ENXIO;
+
+	return rsp->hdr.rc;
+}
+
+void otx2_sq_free_sqbs(struct otx2_nic *pfvf)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_hw *hw = &pfvf->hw;
+	struct otx2_snd_queue *sq;
+	int sqb, qidx;
+	u64 iova, pa;
+
+	for (qidx = 0; qidx < hw->tx_queues; qidx++) {
+		sq = &qset->sq[qidx];
+		if (!sq->sqb_ptrs)
+			continue;
+		for (sqb = 0; sqb < sq->sqb_count; sqb++) {
+			if (!sq->sqb_ptrs[sqb])
+				continue;
+			iova = sq->sqb_ptrs[sqb];
+			pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+			dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size,
+					     DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+			put_page(virt_to_page(phys_to_virt(pa)));
+		}
+		sq->sqb_count = 0;
+	}
+}
+
+void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type)
+{
+	int pool_id, pool_start = 0, pool_end = 0, size = 0;
+	u64 iova, pa;
+
+	if (type == AURA_NIX_SQ) {
+		pool_start = otx2_get_pool_idx(pfvf, type, 0);
+		pool_end =  pool_start + pfvf->hw.sqpool_cnt;
+		size = pfvf->hw.sqb_size;
+	}
+	if (type == AURA_NIX_RQ) {
+		pool_start = otx2_get_pool_idx(pfvf, type, 0);
+		pool_end = pfvf->hw.rqpool_cnt;
+		size = pfvf->rbsize;
+	}
+
+	/* Free SQB and RQB pointers from the aura pool */
+	for (pool_id = pool_start; pool_id < pool_end; pool_id++) {
+		iova = otx2_aura_allocptr(pfvf, pool_id);
+		while (iova) {
+			if (type == AURA_NIX_RQ)
+				iova -= OTX2_HEAD_ROOM;
+
+			pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+			dma_unmap_page_attrs(pfvf->dev, iova, size,
+					     DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+			put_page(virt_to_page(phys_to_virt(pa)));
+			iova = otx2_aura_allocptr(pfvf, pool_id);
+		}
+	}
+}
+
+void otx2_aura_pool_free(struct otx2_nic *pfvf)
+{
+	struct otx2_pool *pool;
+	int pool_id;
+
+	if (!pfvf->qset.pool)
+		return;
+
+	for (pool_id = 0; pool_id < pfvf->hw.pool_cnt; pool_id++) {
+		pool = &pfvf->qset.pool[pool_id];
+		qmem_free(pfvf->dev, pool->stack);
+		qmem_free(pfvf->dev, pool->fc_addr);
+	}
+	devm_kfree(pfvf->dev, pfvf->qset.pool);
+}
+
+static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
+			  int pool_id, int numptrs)
+{
+	struct npa_aq_enq_req *aq;
+	struct otx2_pool *pool;
+	int err;
+
+	pool = &pfvf->qset.pool[pool_id];
+
+	/* Allocate memory for HW to update Aura count.
+	 * Alloc one cache line, so that it fits all FC_STYPE modes.
+	 */
+	if (!pool->fc_addr) {
+		err = qmem_alloc(pfvf->dev, &pool->fc_addr, 1, OTX2_ALIGN);
+		if (err)
+			return err;
+	}
+
+	/* Initialize this aura's context via AF */
+	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+	if (!aq) {
+		/* Shared mbox memory buffer is full, flush it and retry */
+		err = otx2_sync_mbox_msg(&pfvf->mbox);
+		if (err)
+			return err;
+		aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+		if (!aq)
+			return -ENOMEM;
+	}
+
+	aq->aura_id = aura_id;
+	/* Will be filled by AF with correct pool context address */
+	aq->aura.pool_addr = pool_id;
+	aq->aura.pool_caching = 1;
+	aq->aura.shift = ilog2(numptrs) - 8;
+	aq->aura.count = numptrs;
+	aq->aura.limit = numptrs;
+	aq->aura.avg_level = 255;
+	aq->aura.ena = 1;
+	aq->aura.fc_ena = 1;
+	aq->aura.fc_addr = pool->fc_addr->iova;
+	aq->aura.fc_hyst_bits = 0; /* Store count on all updates */
+
+	/* Fill AQ info */
+	aq->ctype = NPA_AQ_CTYPE_AURA;
+	aq->op = NPA_AQ_INSTOP_INIT;
+
+	return 0;
+}
+
+static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
+			  int stack_pages, int numptrs, int buf_size)
+{
+	struct npa_aq_enq_req *aq;
+	struct otx2_pool *pool;
+	int err;
+
+	pool = &pfvf->qset.pool[pool_id];
+	/* Alloc memory for stack which is used to store buffer pointers */
+	err = qmem_alloc(pfvf->dev, &pool->stack,
+			 stack_pages, pfvf->hw.stack_pg_bytes);
+	if (err)
+		return err;
+
+	pool->rbsize = buf_size;
+	pool->rbpage_order = get_order(buf_size);
+
+	/* Initialize this pool's context via AF */
+	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+	if (!aq) {
+		/* Shared mbox memory buffer is full, flush it and retry */
+		err = otx2_sync_mbox_msg(&pfvf->mbox);
+		if (err) {
+			qmem_free(pfvf->dev, pool->stack);
+			return err;
+		}
+		aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
+		if (!aq) {
+			qmem_free(pfvf->dev, pool->stack);
+			return -ENOMEM;
+		}
+	}
+
+	aq->aura_id = pool_id;
+	aq->pool.stack_base = pool->stack->iova;
+	aq->pool.stack_caching = 1;
+	aq->pool.ena = 1;
+	aq->pool.buf_size = buf_size / 128;
+	aq->pool.stack_max_pages = stack_pages;
+	aq->pool.shift = ilog2(numptrs) - 8;
+	aq->pool.ptr_start = 0;
+	aq->pool.ptr_end = ~0ULL;
+
+	/* Fill AQ info */
+	aq->ctype = NPA_AQ_CTYPE_POOL;
+	aq->op = NPA_AQ_INSTOP_INIT;
+
+	return 0;
+}
+
+int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
+{
+	int qidx, pool_id, stack_pages, num_sqbs;
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_hw *hw = &pfvf->hw;
+	struct otx2_snd_queue *sq;
+	struct otx2_pool *pool;
+	int err, ptr;
+	s64 bufptr;
+
+	/* Calculate number of SQBs needed.
+	 *
+	 * For a 128byte SQE, and 4K size SQB, 31 SQEs will fit in one SQB.
+	 * Last SQE is used for pointing to next SQB.
+	 */
+	num_sqbs = (hw->sqb_size / 128) - 1;
+	num_sqbs = (qset->sqe_cnt + num_sqbs) / num_sqbs;
+
+	/* Get no of stack pages needed */
+	stack_pages =
+		(num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
+
+	for (qidx = 0; qidx < hw->tx_queues; qidx++) {
+		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+		/* Initialize aura context */
+		err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs);
+		if (err)
+			goto fail;
+
+		/* Initialize pool context */
+		err = otx2_pool_init(pfvf, pool_id, stack_pages,
+				     num_sqbs, hw->sqb_size);
+		if (err)
+			goto fail;
+	}
+
+	/* Flush accumulated messages */
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err)
+		goto fail;
+
+	/* Allocate pointers and free them to aura/pool */
+	for (qidx = 0; qidx < hw->tx_queues; qidx++) {
+		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx);
+		pool = &pfvf->qset.pool[pool_id];
+
+		sq = &qset->sq[qidx];
+		sq->sqb_count = 0;
+		sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(u64 *), GFP_KERNEL);
+		if (!sq->sqb_ptrs)
+			return -ENOMEM;
+
+		for (ptr = 0; ptr < num_sqbs; ptr++) {
+			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
+			if (bufptr <= 0)
+				return bufptr;
+			otx2_aura_freeptr(pfvf, pool_id, bufptr);
+			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
+		}
+		otx2_get_page(pool);
+	}
+
+	return 0;
+fail:
+	otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+	otx2_aura_pool_free(pfvf);
+	return err;
+}
+
+int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	int stack_pages, pool_id, rq;
+	struct otx2_pool *pool;
+	int err, ptr, num_ptrs;
+	s64 bufptr;
+
+	num_ptrs = pfvf->qset.rqe_cnt;
+
+	stack_pages =
+		(num_ptrs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs;
+
+	for (rq = 0; rq < hw->rx_queues; rq++) {
+		pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, rq);
+		/* Initialize aura context */
+		err = otx2_aura_init(pfvf, pool_id, pool_id, num_ptrs);
+		if (err)
+			goto fail;
+	}
+	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
+		err = otx2_pool_init(pfvf, pool_id, stack_pages,
+				     num_ptrs, pfvf->rbsize);
+		if (err)
+			goto fail;
+	}
+
+	/* Flush accumulated messages */
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err)
+		goto fail;
+
+	/* Allocate pointers and free them to aura/pool */
+	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
+		pool = &pfvf->qset.pool[pool_id];
+		for (ptr = 0; ptr < num_ptrs; ptr++) {
+			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
+			if (bufptr <= 0)
+				return bufptr;
+			otx2_aura_freeptr(pfvf, pool_id,
+					  bufptr + OTX2_HEAD_ROOM);
+		}
+		otx2_get_page(pool);
+	}
+
+	return 0;
+fail:
+	otx2_mbox_reset(&pfvf->mbox.mbox, 0);
+	otx2_aura_pool_free(pfvf);
+	return err;
+}
+
+int otx2_config_npa(struct otx2_nic *pfvf)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct npa_lf_alloc_req  *npalf;
+	struct otx2_hw *hw = &pfvf->hw;
+	int aura_cnt;
+
+	/* Pool - Stack of free buffer pointers
+	 * Aura - Alloc/frees pointers from/to pool for NIX DMA.
+	 */
+
+	if (!hw->pool_cnt)
+		return -EINVAL;
+
+	qset->pool = devm_kzalloc(pfvf->dev, sizeof(struct otx2_pool) *
+				  hw->pool_cnt, GFP_KERNEL);
+	if (!qset->pool)
+		return -ENOMEM;
+
+	/* Get memory to put this msg */
+	npalf = otx2_mbox_alloc_msg_npa_lf_alloc(&pfvf->mbox);
+	if (!npalf)
+		return -ENOMEM;
+
+	/* Set aura and pool counts */
+	npalf->nr_pools = hw->pool_cnt;
+	aura_cnt = ilog2(roundup_pow_of_two(hw->pool_cnt));
+	npalf->aura_sz = (aura_cnt >= ilog2(128)) ? (aura_cnt - 6) : 1;
+
+	return otx2_sync_mbox_msg(&pfvf->mbox);
+}
+
+int otx2_detach_resources(struct mbox *mbox)
+{
+	struct rsrc_detach *detach;
+
+	otx2_mbox_lock(mbox);
+	detach = otx2_mbox_alloc_msg_detach_resources(mbox);
+	if (!detach) {
+		otx2_mbox_unlock(mbox);
+		return -ENOMEM;
+	}
+
+	/* detach all */
+	detach->partial = false;
+
+	/* Send detach request to AF */
+	otx2_mbox_msg_send(&mbox->mbox, 0);
+	otx2_mbox_unlock(mbox);
+	return 0;
+}
+
+int otx2_attach_npa_nix(struct otx2_nic *pfvf)
+{
+	struct rsrc_attach *attach;
+	struct msg_req *msix;
+	int err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	/* Get memory to put this msg */
+	attach = otx2_mbox_alloc_msg_attach_resources(&pfvf->mbox);
+	if (!attach) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	attach->npalf = true;
+	attach->nixlf = true;
+
+	/* Send attach request to AF */
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return err;
+	}
+
+	pfvf->nix_blkaddr = BLKADDR_NIX0;
+
+	/* If the platform has two NIX blocks then LF may be
+	 * allocated from NIX1.
+	 */
+	if (otx2_read64(pfvf, RVU_PF_BLOCK_ADDRX_DISC(BLKADDR_NIX1)) & 0x1FFULL)
+		pfvf->nix_blkaddr = BLKADDR_NIX1;
+
+	/* Get NPA and NIX MSIX vector offsets */
+	msix = otx2_mbox_alloc_msg_msix_offset(&pfvf->mbox);
+	if (!msix) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (err) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return err;
+	}
+	otx2_mbox_unlock(&pfvf->mbox);
+
+	if (pfvf->hw.npa_msixoff == MSIX_VECTOR_INVALID ||
+	    pfvf->hw.nix_msixoff == MSIX_VECTOR_INVALID) {
+		dev_err(pfvf->dev,
+			"RVUPF: Invalid MSIX vector offset for NPA/NIX\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void otx2_ctx_disable(struct mbox *mbox, int type, bool npa)
+{
+	struct hwctx_disable_req *req;
+
+	otx2_mbox_lock(mbox);
+	/* Request AQ to disable this context */
+	if (npa)
+		req = otx2_mbox_alloc_msg_npa_hwctx_disable(mbox);
+	else
+		req = otx2_mbox_alloc_msg_nix_hwctx_disable(mbox);
+
+	if (!req) {
+		otx2_mbox_unlock(mbox);
+		return;
+	}
+
+	req->ctype = type;
+
+	if (otx2_sync_mbox_msg(mbox))
+		dev_err(mbox->pfvf->dev, "%s failed to disable context\n",
+			__func__);
+
+	otx2_mbox_unlock(mbox);
+}
+
+/* Mbox message handlers */
+void mbox_handler_cgx_stats(struct otx2_nic *pfvf,
+			    struct cgx_stats_rsp *rsp)
+{
+	int id;
+
+	for (id = 0; id < CGX_RX_STATS_COUNT; id++)
+		pfvf->hw.cgx_rx_stats[id] = rsp->rx_stats[id];
+	for (id = 0; id < CGX_TX_STATS_COUNT; id++)
+		pfvf->hw.cgx_tx_stats[id] = rsp->tx_stats[id];
+}
+
+void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf,
+				  struct nix_txsch_alloc_rsp *rsp)
+{
+	int lvl, schq;
+
+	/* Setup transmit scheduler list */
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++)
+		for (schq = 0; schq < rsp->schq[lvl]; schq++)
+			pf->hw.txschq_list[lvl][schq] =
+				rsp->schq_list[lvl][schq];
+}
+
+void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf,
+			       struct npa_lf_alloc_rsp *rsp)
+{
+	pfvf->hw.stack_pg_ptrs = rsp->stack_pg_ptrs;
+	pfvf->hw.stack_pg_bytes = rsp->stack_pg_bytes;
+}
+
+void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf,
+			       struct nix_lf_alloc_rsp *rsp)
+{
+	pfvf->hw.sqb_size = rsp->sqb_size;
+	pfvf->hw.rx_chan_base = rsp->rx_chan_base;
+	pfvf->hw.tx_chan_base = rsp->tx_chan_base;
+	pfvf->hw.lso_tsov4_idx = rsp->lso_tsov4_idx;
+	pfvf->hw.lso_tsov6_idx = rsp->lso_tsov6_idx;
+}
+
+void mbox_handler_msix_offset(struct otx2_nic *pfvf,
+			      struct msix_offset_rsp *rsp)
+{
+	pfvf->hw.npa_msixoff = rsp->npa_msixoff;
+	pfvf->hw.nix_msixoff = rsp->nix_msixoff;
+}
+
+void otx2_free_cints(struct otx2_nic *pfvf, int n)
+{
+	struct otx2_qset *qset = &pfvf->qset;
+	struct otx2_hw *hw = &pfvf->hw;
+	int irq, qidx;
+
+	for (qidx = 0, irq = hw->nix_msixoff + NIX_LF_CINT_VEC_START;
+	     qidx < n;
+	     qidx++, irq++) {
+		int vector = pci_irq_vector(pfvf->pdev, irq);
+
+		irq_set_affinity_hint(vector, NULL);
+		free_cpumask_var(hw->affinity_mask[irq]);
+		free_irq(vector, &qset->napi[qidx]);
+	}
+}
+
+void otx2_set_cints_affinity(struct otx2_nic *pfvf)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+	int vec, cpu, irq, cint;
+
+	vec = hw->nix_msixoff + NIX_LF_CINT_VEC_START;
+	cpu = cpumask_first(cpu_online_mask);
+
+	/* CQ interrupts */
+	for (cint = 0; cint < pfvf->hw.cint_cnt; cint++, vec++) {
+		if (!alloc_cpumask_var(&hw->affinity_mask[vec], GFP_KERNEL))
+			return;
+
+		cpumask_set_cpu(cpu, hw->affinity_mask[vec]);
+
+		irq = pci_irq_vector(pfvf->pdev, vec);
+		irq_set_affinity_hint(irq, hw->affinity_mask[vec]);
+
+		cpu = cpumask_next(cpu, cpu_online_mask);
+		if (unlikely(cpu >= nr_cpu_ids))
+			cpu = 0;
+	}
+}
+
+#define M(_name, _id, _fn_name, _req_type, _rsp_type)			\
+int __weak								\
+otx2_mbox_up_handler_ ## _fn_name(struct otx2_nic *pfvf,		\
+				struct _req_type *req,			\
+				struct _rsp_type *rsp)			\
+{									\
+	/* Nothing to do here */					\
+	return 0;							\
+}									\
+EXPORT_SYMBOL(otx2_mbox_up_handler_ ## _fn_name);
+MBOX_UP_CGX_MESSAGES
+#undef M
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
new file mode 100644
index 000000000000..320f3b7bf57f
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -0,0 +1,615 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef OTX2_COMMON_H
+#define OTX2_COMMON_H
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+
+#include <mbox.h>
+#include "otx2_reg.h"
+#include "otx2_txrx.h"
+
+/* PCI device IDs */
+#define PCI_DEVID_OCTEONTX2_RVU_PF              0xA063
+
+#define PCI_SUBSYS_DEVID_96XX_RVU_PFVF		0xB200
+
+/* PCI BAR nos */
+#define PCI_CFG_REG_BAR_NUM                     2
+#define PCI_MBOX_BAR_NUM                        4
+
+#define NAME_SIZE                               32
+
+enum arua_mapped_qtypes {
+	AURA_NIX_RQ,
+	AURA_NIX_SQ,
+};
+
+/* NIX LF interrupts range*/
+#define NIX_LF_QINT_VEC_START			0x00
+#define NIX_LF_CINT_VEC_START			0x40
+#define NIX_LF_GINT_VEC				0x80
+#define NIX_LF_ERR_VEC				0x81
+#define NIX_LF_POISON_VEC			0x82
+
+/* RSS configuration */
+struct otx2_rss_info {
+	u8 enable;
+	u32 flowkey_cfg;
+	u16 rss_size;
+	u8  ind_tbl[MAX_RSS_INDIR_TBL_SIZE];
+#define RSS_HASH_KEY_SIZE	44   /* 352 bit key */
+	u8  key[RSS_HASH_KEY_SIZE];
+};
+
+/* NIX (or NPC) RX errors */
+enum otx2_errlvl {
+	NPC_ERRLVL_RE,
+	NPC_ERRLVL_LID_LA,
+	NPC_ERRLVL_LID_LB,
+	NPC_ERRLVL_LID_LC,
+	NPC_ERRLVL_LID_LD,
+	NPC_ERRLVL_LID_LE,
+	NPC_ERRLVL_LID_LF,
+	NPC_ERRLVL_LID_LG,
+	NPC_ERRLVL_LID_LH,
+	NPC_ERRLVL_NIX = 0x0F,
+};
+
+enum otx2_errcodes_re {
+	/* NPC_ERRLVL_RE errcodes */
+	ERRCODE_FCS = 0x7,
+	ERRCODE_FCS_RCV = 0x8,
+	ERRCODE_UNDERSIZE = 0x10,
+	ERRCODE_OVERSIZE = 0x11,
+	ERRCODE_OL2_LEN_MISMATCH = 0x12,
+	/* NPC_ERRLVL_NIX errcodes */
+	ERRCODE_OL3_LEN = 0x10,
+	ERRCODE_OL4_LEN = 0x11,
+	ERRCODE_OL4_CSUM = 0x12,
+	ERRCODE_IL3_LEN = 0x20,
+	ERRCODE_IL4_LEN = 0x21,
+	ERRCODE_IL4_CSUM = 0x22,
+};
+
+/* NIX TX stats */
+enum nix_stat_lf_tx {
+	TX_UCAST	= 0x0,
+	TX_BCAST	= 0x1,
+	TX_MCAST	= 0x2,
+	TX_DROP		= 0x3,
+	TX_OCTS		= 0x4,
+	TX_STATS_ENUM_LAST,
+};
+
+/* NIX RX stats */
+enum nix_stat_lf_rx {
+	RX_OCTS		= 0x0,
+	RX_UCAST	= 0x1,
+	RX_BCAST	= 0x2,
+	RX_MCAST	= 0x3,
+	RX_DROP		= 0x4,
+	RX_DROP_OCTS	= 0x5,
+	RX_FCS		= 0x6,
+	RX_ERR		= 0x7,
+	RX_DRP_BCAST	= 0x8,
+	RX_DRP_MCAST	= 0x9,
+	RX_DRP_L3BCAST	= 0xa,
+	RX_DRP_L3MCAST	= 0xb,
+	RX_STATS_ENUM_LAST,
+};
+
+struct otx2_dev_stats {
+	u64 rx_bytes;
+	u64 rx_frames;
+	u64 rx_ucast_frames;
+	u64 rx_bcast_frames;
+	u64 rx_mcast_frames;
+	u64 rx_drops;
+
+	u64 tx_bytes;
+	u64 tx_frames;
+	u64 tx_ucast_frames;
+	u64 tx_bcast_frames;
+	u64 tx_mcast_frames;
+	u64 tx_drops;
+};
+
+/* Driver counted stats */
+struct otx2_drv_stats {
+	atomic_t rx_fcs_errs;
+	atomic_t rx_oversize_errs;
+	atomic_t rx_undersize_errs;
+	atomic_t rx_csum_errs;
+	atomic_t rx_len_errs;
+	atomic_t rx_other_errs;
+};
+
+struct mbox {
+	struct otx2_mbox	mbox;
+	struct work_struct	mbox_wrk;
+	struct otx2_mbox	mbox_up;
+	struct work_struct	mbox_up_wrk;
+	struct otx2_nic		*pfvf;
+	void			*bbuf_base; /* Bounce buffer for mbox memory */
+	struct mutex		lock;	/* serialize mailbox access */
+	int			num_msgs; /* mbox number of messages */
+	int			up_num_msgs; /* mbox_up number of messages */
+};
+
+struct otx2_hw {
+	struct pci_dev		*pdev;
+	struct otx2_rss_info	rss_info;
+	u16                     rx_queues;
+	u16                     tx_queues;
+	u16			max_queues;
+	u16			pool_cnt;
+	u16			rqpool_cnt;
+	u16			sqpool_cnt;
+
+	/* NPA */
+	u32			stack_pg_ptrs;  /* No of ptrs per stack page */
+	u32			stack_pg_bytes; /* Size of stack page */
+	u16			sqb_size;
+
+	/* NIX */
+	u16		txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+
+	/* HW settings, coalescing etc */
+	u16			rx_chan_base;
+	u16			tx_chan_base;
+	u16			cq_qcount_wait;
+	u16			cq_ecount_wait;
+	u16			rq_skid;
+	u8			cq_time_wait;
+
+	/* For TSO segmentation */
+	u8			lso_tsov4_idx;
+	u8			lso_tsov6_idx;
+	u8			hw_tso;
+
+	/* MSI-X */
+	u8			cint_cnt; /* CQ interrupt count */
+	u16			npa_msixoff; /* Offset of NPA vectors */
+	u16			nix_msixoff; /* Offset of NIX vectors */
+	char			*irq_name;
+	cpumask_var_t           *affinity_mask;
+
+	/* Stats */
+	struct otx2_dev_stats	dev_stats;
+	struct otx2_drv_stats	drv_stats;
+	u64			cgx_rx_stats[CGX_RX_STATS_COUNT];
+	u64			cgx_tx_stats[CGX_TX_STATS_COUNT];
+};
+
+struct refill_work {
+	struct delayed_work pool_refill_work;
+	struct otx2_nic *pf;
+};
+
+struct otx2_nic {
+	void __iomem		*reg_base;
+	struct net_device	*netdev;
+	void			*iommu_domain;
+	u16			max_frs;
+	u16			rbsize; /* Receive buffer size */
+
+#define OTX2_FLAG_INTF_DOWN			BIT_ULL(2)
+	u64			flags;
+
+	struct otx2_qset	qset;
+	struct otx2_hw		hw;
+	struct pci_dev		*pdev;
+	struct device		*dev;
+
+	/* Mbox */
+	struct mbox		mbox;
+	struct workqueue_struct *mbox_wq;
+
+	u16			pcifunc; /* RVU PF_FUNC */
+	struct cgx_link_user_info linfo;
+
+	u64			reset_count;
+	struct work_struct	reset_task;
+	struct refill_work	*refill_wrk;
+
+	/* Ethtool stuff */
+	u32			msg_enable;
+
+	/* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */
+	int			nix_blkaddr;
+};
+
+static inline bool is_96xx_A0(struct pci_dev *pdev)
+{
+	return (pdev->revision == 0x00) &&
+		(pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX_RVU_PFVF);
+}
+
+static inline bool is_96xx_B0(struct pci_dev *pdev)
+{
+	return (pdev->revision == 0x01) &&
+		(pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX_RVU_PFVF);
+}
+
+static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf)
+{
+	struct otx2_hw *hw = &pfvf->hw;
+
+	pfvf->hw.cq_time_wait = CQ_TIMER_THRESH_DEFAULT;
+	pfvf->hw.cq_ecount_wait = CQ_CQE_THRESH_DEFAULT;
+	pfvf->hw.cq_qcount_wait = CQ_QCOUNT_DEFAULT;
+
+	hw->hw_tso = true;
+
+	if (is_96xx_A0(pfvf->pdev)) {
+		hw->hw_tso = false;
+
+		/* Time based irq coalescing is not supported */
+		pfvf->hw.cq_qcount_wait = 0x0;
+
+		/* Due to HW issue previous silicons required minimum
+		 * 600 unused CQE to avoid CQ overflow.
+		 */
+		pfvf->hw.rq_skid = 600;
+		pfvf->qset.rqe_cnt = Q_COUNT(Q_SIZE_1K);
+	}
+}
+
+/* Register read/write APIs */
+static inline void __iomem *otx2_get_regaddr(struct otx2_nic *nic, u64 offset)
+{
+	u64 blkaddr;
+
+	switch ((offset >> RVU_FUNC_BLKADDR_SHIFT) & RVU_FUNC_BLKADDR_MASK) {
+	case BLKTYPE_NIX:
+		blkaddr = nic->nix_blkaddr;
+		break;
+	case BLKTYPE_NPA:
+		blkaddr = BLKADDR_NPA;
+		break;
+	default:
+		blkaddr = BLKADDR_RVUM;
+		break;
+	};
+
+	offset &= ~(RVU_FUNC_BLKADDR_MASK << RVU_FUNC_BLKADDR_SHIFT);
+	offset |= (blkaddr << RVU_FUNC_BLKADDR_SHIFT);
+
+	return nic->reg_base + offset;
+}
+
+static inline void otx2_write64(struct otx2_nic *nic, u64 offset, u64 val)
+{
+	void __iomem *addr = otx2_get_regaddr(nic, offset);
+
+	writeq(val, addr);
+}
+
+static inline u64 otx2_read64(struct otx2_nic *nic, u64 offset)
+{
+	void __iomem *addr = otx2_get_regaddr(nic, offset);
+
+	return readq(addr);
+}
+
+/* Mbox bounce buffer APIs */
+static inline int otx2_mbox_bbuf_init(struct mbox *mbox, struct pci_dev *pdev)
+{
+	struct otx2_mbox *otx2_mbox;
+	struct otx2_mbox_dev *mdev;
+
+	mbox->bbuf_base = devm_kmalloc(&pdev->dev, MBOX_SIZE, GFP_KERNEL);
+	if (!mbox->bbuf_base)
+		return -ENOMEM;
+
+	/* Overwrite mbox mbase to point to bounce buffer, so that PF/VF
+	 * prepare all mbox messages in bounce buffer instead of directly
+	 * in hw mbox memory.
+	 */
+	otx2_mbox = &mbox->mbox;
+	mdev = &otx2_mbox->dev[0];
+	mdev->mbase = mbox->bbuf_base;
+
+	otx2_mbox = &mbox->mbox_up;
+	mdev = &otx2_mbox->dev[0];
+	mdev->mbase = mbox->bbuf_base;
+	return 0;
+}
+
+static inline void otx2_sync_mbox_bbuf(struct otx2_mbox *mbox, int devid)
+{
+	u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+	void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE);
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	struct mbox_hdr *hdr;
+	u64 msg_size;
+
+	if (mdev->mbase == hw_mbase)
+		return;
+
+	hdr = hw_mbase + mbox->rx_start;
+	msg_size = hdr->msg_size;
+
+	if (msg_size > mbox->rx_size - msgs_offset)
+		msg_size = mbox->rx_size - msgs_offset;
+
+	/* Copy mbox messages from mbox memory to bounce buffer */
+	memcpy(mdev->mbase + mbox->rx_start,
+	       hw_mbase + mbox->rx_start, msg_size + msgs_offset);
+}
+
+static inline void otx2_mbox_lock_init(struct mbox *mbox)
+{
+	mutex_init(&mbox->lock);
+}
+
+static inline void otx2_mbox_lock(struct mbox *mbox)
+{
+	mutex_lock(&mbox->lock);
+}
+
+static inline void otx2_mbox_unlock(struct mbox *mbox)
+{
+	mutex_unlock(&mbox->lock);
+}
+
+/* With the absence of API for 128-bit IO memory access for arm64,
+ * implement required operations at place.
+ */
+#if defined(CONFIG_ARM64)
+static inline void otx2_write128(u64 lo, u64 hi, void __iomem *addr)
+{
+	__asm__ volatile("stp %x[x0], %x[x1], [%x[p1],#0]!"
+			 ::[x0]"r"(lo), [x1]"r"(hi), [p1]"r"(addr));
+}
+
+static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr)
+{
+	u64 result;
+
+	__asm__ volatile(".cpu   generic+lse\n"
+			 "ldadd %x[i], %x[r], [%[b]]"
+			 : [r]"=r"(result), "+m"(*ptr)
+			 : [i]"r"(incr), [b]"r"(ptr)
+			 : "memory");
+	return result;
+}
+
+static inline u64 otx2_lmt_flush(uint64_t addr)
+{
+	u64 result = 0;
+
+	__asm__ volatile(".cpu  generic+lse\n"
+			 "ldeor xzr,%x[rf],[%[rs]]"
+			 : [rf]"=r"(result)
+			 : [rs]"r"(addr));
+	return result;
+}
+
+#else
+#define otx2_write128(lo, hi, addr)
+#define otx2_atomic64_add(incr, ptr)		({ *ptr += incr; })
+#define otx2_lmt_flush(addr)			({ 0; })
+#endif
+
+/* Alloc pointer from pool/aura */
+static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura)
+{
+	u64 *ptr = (u64 *)otx2_get_regaddr(pfvf,
+			   NPA_LF_AURA_OP_ALLOCX(0));
+	u64 incr = (u64)aura | BIT_ULL(63);
+
+	return otx2_atomic64_add(incr, ptr);
+}
+
+/* Free pointer to a pool/aura */
+static inline void otx2_aura_freeptr(struct otx2_nic *pfvf,
+				     int aura, s64 buf)
+{
+	otx2_write128((u64)buf, (u64)aura | BIT_ULL(63),
+		      otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0));
+}
+
+/* Update page ref count */
+static inline void otx2_get_page(struct otx2_pool *pool)
+{
+	if (!pool->page)
+		return;
+
+	if (pool->pageref)
+		page_ref_add(pool->page, pool->pageref);
+	pool->pageref = 0;
+	pool->page = NULL;
+}
+
+static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx)
+{
+	if (type == AURA_NIX_SQ)
+		return pfvf->hw.rqpool_cnt + idx;
+
+	 /* AURA_NIX_RQ */
+	return idx;
+}
+
+/* Mbox APIs */
+static inline int otx2_sync_mbox_msg(struct mbox *mbox)
+{
+	int err;
+
+	if (!otx2_mbox_nonempty(&mbox->mbox, 0))
+		return 0;
+	otx2_mbox_msg_send(&mbox->mbox, 0);
+	err = otx2_mbox_wait_for_rsp(&mbox->mbox, 0);
+	if (err)
+		return err;
+
+	return otx2_mbox_check_rsp_msgs(&mbox->mbox, 0);
+}
+
+static inline int otx2_sync_mbox_up_msg(struct mbox *mbox, int devid)
+{
+	int err;
+
+	if (!otx2_mbox_nonempty(&mbox->mbox_up, devid))
+		return 0;
+	otx2_mbox_msg_send(&mbox->mbox_up, devid);
+	err = otx2_mbox_wait_for_rsp(&mbox->mbox_up, devid);
+	if (err)
+		return err;
+
+	return otx2_mbox_check_rsp_msgs(&mbox->mbox_up, devid);
+}
+
+/* Use this API to send mbox msgs in atomic context
+ * where sleeping is not allowed
+ */
+static inline int otx2_sync_mbox_msg_busy_poll(struct mbox *mbox)
+{
+	int err;
+
+	if (!otx2_mbox_nonempty(&mbox->mbox, 0))
+		return 0;
+	otx2_mbox_msg_send(&mbox->mbox, 0);
+	err = otx2_mbox_busy_poll_for_rsp(&mbox->mbox, 0);
+	if (err)
+		return err;
+
+	return otx2_mbox_check_rsp_msgs(&mbox->mbox, 0);
+}
+
+#define M(_name, _id, _fn_name, _req_type, _rsp_type)                   \
+static struct _req_type __maybe_unused					\
+*otx2_mbox_alloc_msg_ ## _fn_name(struct mbox *mbox)                    \
+{									\
+	struct _req_type *req;						\
+									\
+	req = (struct _req_type *)otx2_mbox_alloc_msg_rsp(		\
+		&mbox->mbox, 0, sizeof(struct _req_type),		\
+		sizeof(struct _rsp_type));				\
+	if (!req)							\
+		return NULL;						\
+	req->hdr.sig = OTX2_MBOX_REQ_SIG;				\
+	req->hdr.id = _id;						\
+	return req;							\
+}
+
+MBOX_MESSAGES
+#undef M
+
+#define M(_name, _id, _fn_name, _req_type, _rsp_type)			\
+int									\
+otx2_mbox_up_handler_ ## _fn_name(struct otx2_nic *pfvf,		\
+				struct _req_type *req,			\
+				struct _rsp_type *rsp);			\
+
+MBOX_UP_CGX_MESSAGES
+#undef M
+
+/* Time to wait before watchdog kicks off */
+#define OTX2_TX_TIMEOUT		(100 * HZ)
+
+#define	RVU_PFVF_PF_SHIFT	10
+#define	RVU_PFVF_PF_MASK	0x3F
+#define	RVU_PFVF_FUNC_SHIFT	0
+#define	RVU_PFVF_FUNC_MASK	0x3FF
+
+static inline int rvu_get_pf(u16 pcifunc)
+{
+	return (pcifunc >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
+}
+
+static inline dma_addr_t otx2_dma_map_page(struct otx2_nic *pfvf,
+					   struct page *page,
+					   size_t offset, size_t size,
+					   enum dma_data_direction dir)
+{
+	dma_addr_t iova;
+
+	iova = dma_map_page_attrs(pfvf->dev, page,
+				  offset, size, dir, DMA_ATTR_SKIP_CPU_SYNC);
+	if (unlikely(dma_mapping_error(pfvf->dev, iova)))
+		return (dma_addr_t)NULL;
+	return iova;
+}
+
+static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf,
+				       dma_addr_t addr, size_t size,
+				       enum dma_data_direction dir)
+{
+	dma_unmap_page_attrs(pfvf->dev, addr, size,
+			     dir, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+/* MSI-X APIs */
+void otx2_free_cints(struct otx2_nic *pfvf, int n);
+void otx2_set_cints_affinity(struct otx2_nic *pfvf);
+int otx2_set_mac_address(struct net_device *netdev, void *p);
+int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu);
+void otx2_tx_timeout(struct net_device *netdev, unsigned int txq);
+void otx2_get_mac_from_af(struct net_device *netdev);
+void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
+
+/* RVU block related APIs */
+int otx2_attach_npa_nix(struct otx2_nic *pfvf);
+int otx2_detach_resources(struct mbox *mbox);
+int otx2_config_npa(struct otx2_nic *pfvf);
+int otx2_sq_aura_pool_init(struct otx2_nic *pfvf);
+int otx2_rq_aura_pool_init(struct otx2_nic *pfvf);
+void otx2_aura_pool_free(struct otx2_nic *pfvf);
+void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type);
+void otx2_sq_free_sqbs(struct otx2_nic *pfvf);
+int otx2_config_nix(struct otx2_nic *pfvf);
+int otx2_config_nix_queues(struct otx2_nic *pfvf);
+int otx2_txschq_config(struct otx2_nic *pfvf, int lvl);
+int otx2_txsch_alloc(struct otx2_nic *pfvf);
+int otx2_txschq_stop(struct otx2_nic *pfvf);
+void otx2_sqb_flush(struct otx2_nic *pfvf);
+dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			   gfp_t gfp);
+int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
+void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
+void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
+void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq);
+
+/* RSS configuration APIs*/
+int otx2_rss_init(struct otx2_nic *pfvf);
+int otx2_set_flowkey_cfg(struct otx2_nic *pfvf);
+void otx2_set_rss_key(struct otx2_nic *pfvf);
+int otx2_set_rss_table(struct otx2_nic *pfvf);
+
+/* Mbox handlers */
+void mbox_handler_msix_offset(struct otx2_nic *pfvf,
+			      struct msix_offset_rsp *rsp);
+void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf,
+			       struct npa_lf_alloc_rsp *rsp);
+void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf,
+			       struct nix_lf_alloc_rsp *rsp);
+void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf,
+				  struct nix_txsch_alloc_rsp *rsp);
+void mbox_handler_cgx_stats(struct otx2_nic *pfvf,
+			    struct cgx_stats_rsp *rsp);
+
+/* Device stats APIs */
+void otx2_get_dev_stats(struct otx2_nic *pfvf);
+void otx2_get_stats64(struct net_device *netdev,
+		      struct rtnl_link_stats64 *stats);
+void otx2_update_lmac_stats(struct otx2_nic *pfvf);
+int otx2_update_rq_stats(struct otx2_nic *pfvf, int qidx);
+int otx2_update_sq_stats(struct otx2_nic *pfvf, int qidx);
+void otx2_set_ethtool_ops(struct net_device *netdev);
+
+int otx2_open(struct net_device *netdev);
+int otx2_stop(struct net_device *netdev);
+int otx2_set_real_num_queues(struct net_device *netdev,
+			     int tx_queues, int rx_queues);
+#endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
new file mode 100644
index 000000000000..60fcf82dd8cb
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -0,0 +1,662 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/pci.h>
+#include <linux/ethtool.h>
+#include <linux/stddef.h>
+#include <linux/etherdevice.h>
+#include <linux/log2.h>
+
+#include "otx2_common.h"
+
+#define DRV_NAME	"octeontx2-nicpf"
+
+struct otx2_stat {
+	char name[ETH_GSTRING_LEN];
+	unsigned int index;
+};
+
+/* HW device stats */
+#define OTX2_DEV_STAT(stat) { \
+	.name = #stat, \
+	.index = offsetof(struct otx2_dev_stats, stat) / sizeof(u64), \
+}
+
+static const struct otx2_stat otx2_dev_stats[] = {
+	OTX2_DEV_STAT(rx_ucast_frames),
+	OTX2_DEV_STAT(rx_bcast_frames),
+	OTX2_DEV_STAT(rx_mcast_frames),
+
+	OTX2_DEV_STAT(tx_ucast_frames),
+	OTX2_DEV_STAT(tx_bcast_frames),
+	OTX2_DEV_STAT(tx_mcast_frames),
+};
+
+/* Driver level stats */
+#define OTX2_DRV_STAT(stat) { \
+	.name = #stat, \
+	.index = offsetof(struct otx2_drv_stats, stat) / sizeof(atomic_t), \
+}
+
+static const struct otx2_stat otx2_drv_stats[] = {
+	OTX2_DRV_STAT(rx_fcs_errs),
+	OTX2_DRV_STAT(rx_oversize_errs),
+	OTX2_DRV_STAT(rx_undersize_errs),
+	OTX2_DRV_STAT(rx_csum_errs),
+	OTX2_DRV_STAT(rx_len_errs),
+	OTX2_DRV_STAT(rx_other_errs),
+};
+
+static const struct otx2_stat otx2_queue_stats[] = {
+	{ "bytes", 0 },
+	{ "frames", 1 },
+};
+
+static const unsigned int otx2_n_dev_stats = ARRAY_SIZE(otx2_dev_stats);
+static const unsigned int otx2_n_drv_stats = ARRAY_SIZE(otx2_drv_stats);
+static const unsigned int otx2_n_queue_stats = ARRAY_SIZE(otx2_queue_stats);
+
+static void otx2_dev_open(struct net_device *netdev)
+{
+	otx2_open(netdev);
+}
+
+static void otx2_dev_stop(struct net_device *netdev)
+{
+	otx2_stop(netdev);
+}
+
+static void otx2_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *info)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+
+	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strlcpy(info->bus_info, pci_name(pfvf->pdev), sizeof(info->bus_info));
+}
+
+static void otx2_get_qset_strings(struct otx2_nic *pfvf, u8 **data, int qset)
+{
+	int start_qidx = qset * pfvf->hw.rx_queues;
+	int qidx, stats;
+
+	for (qidx = 0; qidx < pfvf->hw.rx_queues; qidx++) {
+		for (stats = 0; stats < otx2_n_queue_stats; stats++) {
+			sprintf(*data, "rxq%d: %s", qidx + start_qidx,
+				otx2_queue_stats[stats].name);
+			*data += ETH_GSTRING_LEN;
+		}
+	}
+	for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+		for (stats = 0; stats < otx2_n_queue_stats; stats++) {
+			sprintf(*data, "txq%d: %s", qidx + start_qidx,
+				otx2_queue_stats[stats].name);
+			*data += ETH_GSTRING_LEN;
+		}
+	}
+}
+
+static void otx2_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	int stats;
+
+	if (sset != ETH_SS_STATS)
+		return;
+
+	for (stats = 0; stats < otx2_n_dev_stats; stats++) {
+		memcpy(data, otx2_dev_stats[stats].name, ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
+	}
+
+	for (stats = 0; stats < otx2_n_drv_stats; stats++) {
+		memcpy(data, otx2_drv_stats[stats].name, ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
+	}
+
+	otx2_get_qset_strings(pfvf, &data, 0);
+
+	for (stats = 0; stats < CGX_RX_STATS_COUNT; stats++) {
+		sprintf(data, "cgx_rxstat%d: ", stats);
+		data += ETH_GSTRING_LEN;
+	}
+
+	for (stats = 0; stats < CGX_TX_STATS_COUNT; stats++) {
+		sprintf(data, "cgx_txstat%d: ", stats);
+		data += ETH_GSTRING_LEN;
+	}
+
+	strcpy(data, "reset_count");
+	data += ETH_GSTRING_LEN;
+}
+
+static void otx2_get_qset_stats(struct otx2_nic *pfvf,
+				struct ethtool_stats *stats, u64 **data)
+{
+	int stat, qidx;
+
+	if (!pfvf)
+		return;
+	for (qidx = 0; qidx < pfvf->hw.rx_queues; qidx++) {
+		if (!otx2_update_rq_stats(pfvf, qidx)) {
+			for (stat = 0; stat < otx2_n_queue_stats; stat++)
+				*((*data)++) = 0;
+			continue;
+		}
+		for (stat = 0; stat < otx2_n_queue_stats; stat++)
+			*((*data)++) = ((u64 *)&pfvf->qset.rq[qidx].stats)
+				[otx2_queue_stats[stat].index];
+	}
+
+	for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) {
+		if (!otx2_update_sq_stats(pfvf, qidx)) {
+			for (stat = 0; stat < otx2_n_queue_stats; stat++)
+				*((*data)++) = 0;
+			continue;
+		}
+		for (stat = 0; stat < otx2_n_queue_stats; stat++)
+			*((*data)++) = ((u64 *)&pfvf->qset.sq[qidx].stats)
+				[otx2_queue_stats[stat].index];
+	}
+}
+
+/* Get device and per queue statistics */
+static void otx2_get_ethtool_stats(struct net_device *netdev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	int stat;
+
+	otx2_get_dev_stats(pfvf);
+	for (stat = 0; stat < otx2_n_dev_stats; stat++)
+		*(data++) = ((u64 *)&pfvf->hw.dev_stats)
+				[otx2_dev_stats[stat].index];
+
+	for (stat = 0; stat < otx2_n_drv_stats; stat++)
+		*(data++) = atomic_read(&((atomic_t *)&pfvf->hw.drv_stats)
+						[otx2_drv_stats[stat].index]);
+
+	otx2_get_qset_stats(pfvf, stats, &data);
+	otx2_update_lmac_stats(pfvf);
+	for (stat = 0; stat < CGX_RX_STATS_COUNT; stat++)
+		*(data++) = pfvf->hw.cgx_rx_stats[stat];
+	for (stat = 0; stat < CGX_TX_STATS_COUNT; stat++)
+		*(data++) = pfvf->hw.cgx_tx_stats[stat];
+	*(data++) = pfvf->reset_count;
+}
+
+static int otx2_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	int qstats_count;
+
+	if (sset != ETH_SS_STATS)
+		return -EINVAL;
+
+	qstats_count = otx2_n_queue_stats *
+		       (pfvf->hw.rx_queues + pfvf->hw.tx_queues);
+
+	return otx2_n_dev_stats + otx2_n_drv_stats + qstats_count +
+		CGX_RX_STATS_COUNT + CGX_TX_STATS_COUNT + 1;
+}
+
+/* Get no of queues device supports and current queue count */
+static void otx2_get_channels(struct net_device *dev,
+			      struct ethtool_channels *channel)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+
+	channel->max_rx = pfvf->hw.max_queues;
+	channel->max_tx = pfvf->hw.max_queues;
+
+	channel->rx_count = pfvf->hw.rx_queues;
+	channel->tx_count = pfvf->hw.tx_queues;
+}
+
+/* Set no of Tx, Rx queues to be used */
+static int otx2_set_channels(struct net_device *dev,
+			     struct ethtool_channels *channel)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+	bool if_up = netif_running(dev);
+	int err = 0;
+
+	if (!channel->rx_count || !channel->tx_count)
+		return -EINVAL;
+
+	if (if_up)
+		otx2_dev_stop(dev);
+
+	err = otx2_set_real_num_queues(dev, channel->tx_count,
+				       channel->rx_count);
+	if (err)
+		goto fail;
+
+	pfvf->hw.rx_queues = channel->rx_count;
+	pfvf->hw.tx_queues = channel->tx_count;
+	pfvf->qset.cq_cnt = pfvf->hw.tx_queues +  pfvf->hw.rx_queues;
+
+fail:
+	if (if_up)
+		otx2_dev_open(dev);
+
+	netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
+		    pfvf->hw.tx_queues, pfvf->hw.rx_queues);
+
+	return err;
+}
+
+static void otx2_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct otx2_qset *qs = &pfvf->qset;
+
+	ring->rx_max_pending = Q_COUNT(Q_SIZE_MAX);
+	ring->rx_pending = qs->rqe_cnt ? qs->rqe_cnt : Q_COUNT(Q_SIZE_256);
+	ring->tx_max_pending = Q_COUNT(Q_SIZE_MAX);
+	ring->tx_pending = qs->sqe_cnt ? qs->sqe_cnt : Q_COUNT(Q_SIZE_4K);
+}
+
+static int otx2_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	bool if_up = netif_running(netdev);
+	struct otx2_qset *qs = &pfvf->qset;
+	u32 rx_count, tx_count;
+
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+		return -EINVAL;
+
+	/* Permitted lengths are 16 64 256 1K 4K 16K 64K 256K 1M  */
+	rx_count = ring->rx_pending;
+	/* On some silicon variants a skid or reserved CQEs are
+	 * needed to avoid CQ overflow.
+	 */
+	if (rx_count < pfvf->hw.rq_skid)
+		rx_count =  pfvf->hw.rq_skid;
+	rx_count = Q_COUNT(Q_SIZE(rx_count, 3));
+
+	/* Due pipelining impact minimum 2000 unused SQ CQE's
+	 * need to be maintained to avoid CQ overflow, hence the
+	 * minimum 4K size.
+	 */
+	tx_count = clamp_t(u32, ring->tx_pending,
+			   Q_COUNT(Q_SIZE_4K), Q_COUNT(Q_SIZE_MAX));
+	tx_count = Q_COUNT(Q_SIZE(tx_count, 3));
+
+	if (tx_count == qs->sqe_cnt && rx_count == qs->rqe_cnt)
+		return 0;
+
+	if (if_up)
+		otx2_dev_stop(netdev);
+
+	/* Assigned to the nearest possible exponent. */
+	qs->sqe_cnt = tx_count;
+	qs->rqe_cnt = rx_count;
+
+	if (if_up)
+		otx2_dev_open(netdev);
+	return 0;
+}
+
+static int otx2_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *cmd)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct otx2_hw *hw = &pfvf->hw;
+
+	cmd->rx_coalesce_usecs = hw->cq_time_wait;
+	cmd->rx_max_coalesced_frames = hw->cq_ecount_wait;
+	cmd->tx_coalesce_usecs = hw->cq_time_wait;
+	cmd->tx_max_coalesced_frames = hw->cq_ecount_wait;
+
+	return 0;
+}
+
+static int otx2_set_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct otx2_hw *hw = &pfvf->hw;
+	int qidx;
+
+	if (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce ||
+	    ec->rx_coalesce_usecs_irq || ec->rx_max_coalesced_frames_irq ||
+	    ec->tx_coalesce_usecs_irq || ec->tx_max_coalesced_frames_irq ||
+	    ec->stats_block_coalesce_usecs || ec->pkt_rate_low ||
+	    ec->rx_coalesce_usecs_low || ec->rx_max_coalesced_frames_low ||
+	    ec->tx_coalesce_usecs_low || ec->tx_max_coalesced_frames_low ||
+	    ec->pkt_rate_high || ec->rx_coalesce_usecs_high ||
+	    ec->rx_max_coalesced_frames_high || ec->tx_coalesce_usecs_high ||
+	    ec->tx_max_coalesced_frames_high || ec->rate_sample_interval)
+		return -EOPNOTSUPP;
+
+	if (!ec->rx_max_coalesced_frames || !ec->tx_max_coalesced_frames)
+		return 0;
+
+	/* 'cq_time_wait' is 8bit and is in multiple of 100ns,
+	 * so clamp the user given value to the range of 1 to 25usec.
+	 */
+	ec->rx_coalesce_usecs = clamp_t(u32, ec->rx_coalesce_usecs,
+					1, CQ_TIMER_THRESH_MAX);
+	ec->tx_coalesce_usecs = clamp_t(u32, ec->tx_coalesce_usecs,
+					1, CQ_TIMER_THRESH_MAX);
+
+	/* Rx and Tx are mapped to same CQ, check which one
+	 * is changed, if both then choose the min.
+	 */
+	if (hw->cq_time_wait == ec->rx_coalesce_usecs)
+		hw->cq_time_wait = ec->tx_coalesce_usecs;
+	else if (hw->cq_time_wait == ec->tx_coalesce_usecs)
+		hw->cq_time_wait = ec->rx_coalesce_usecs;
+	else
+		hw->cq_time_wait = min_t(u8, ec->rx_coalesce_usecs,
+					 ec->tx_coalesce_usecs);
+
+	/* Max ecount_wait supported is 16bit,
+	 * so clamp the user given value to the range of 1 to 64k.
+	 */
+	ec->rx_max_coalesced_frames = clamp_t(u32, ec->rx_max_coalesced_frames,
+					      1, U16_MAX);
+	ec->tx_max_coalesced_frames = clamp_t(u32, ec->tx_max_coalesced_frames,
+					      1, U16_MAX);
+
+	/* Rx and Tx are mapped to same CQ, check which one
+	 * is changed, if both then choose the min.
+	 */
+	if (hw->cq_ecount_wait == ec->rx_max_coalesced_frames)
+		hw->cq_ecount_wait = ec->tx_max_coalesced_frames;
+	else if (hw->cq_ecount_wait == ec->tx_max_coalesced_frames)
+		hw->cq_ecount_wait = ec->rx_max_coalesced_frames;
+	else
+		hw->cq_ecount_wait = min_t(u16, ec->rx_max_coalesced_frames,
+					   ec->tx_max_coalesced_frames);
+
+	if (netif_running(netdev)) {
+		for (qidx = 0; qidx < pfvf->hw.cint_cnt; qidx++)
+			otx2_config_irq_coalescing(pfvf, qidx);
+	}
+
+	return 0;
+}
+
+static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf,
+				  struct ethtool_rxnfc *nfc)
+{
+	struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+
+	if (!(rss->flowkey_cfg &
+	    (NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6)))
+		return 0;
+
+	/* Mimimum is IPv4 and IPv6, SIP/DIP */
+	nfc->data = RXH_IP_SRC | RXH_IP_DST;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_TCP)
+			nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_UDP)
+			nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+		if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_SCTP)
+			nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf,
+				  struct ethtool_rxnfc *nfc)
+{
+	struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+	u32 rxh_l4 = RXH_L4_B_0_1 | RXH_L4_B_2_3;
+	u32 rss_cfg = rss->flowkey_cfg;
+
+	if (!rss->enable) {
+		netdev_err(pfvf->netdev,
+			   "RSS is disabled, cannot change settings\n");
+		return -EIO;
+	}
+
+	/* Mimimum is IPv4 and IPv6, SIP/DIP */
+	if (!(nfc->data & RXH_IP_SRC) || !(nfc->data & RXH_IP_DST))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		/* Different config for v4 and v6 is not supported.
+		 * Both of them have to be either 4-tuple or 2-tuple.
+		 */
+		switch (nfc->data & rxh_l4) {
+		case 0:
+			rss_cfg &= ~NIX_FLOW_KEY_TYPE_TCP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			rss_cfg |= NIX_FLOW_KEY_TYPE_TCP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		switch (nfc->data & rxh_l4) {
+		case 0:
+			rss_cfg &= ~NIX_FLOW_KEY_TYPE_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			rss_cfg |= NIX_FLOW_KEY_TYPE_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+		switch (nfc->data & rxh_l4) {
+		case 0:
+			rss_cfg &= ~NIX_FLOW_KEY_TYPE_SCTP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			rss_cfg |= NIX_FLOW_KEY_TYPE_SCTP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		rss_cfg = NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rss->flowkey_cfg = rss_cfg;
+	otx2_set_flowkey_cfg(pfvf);
+	return 0;
+}
+
+static int otx2_get_rxnfc(struct net_device *dev,
+			  struct ethtool_rxnfc *nfc, u32 *rules)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (nfc->cmd) {
+	case ETHTOOL_GRXRINGS:
+		nfc->data = pfvf->hw.rx_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		return otx2_get_rss_hash_opts(pfvf, nfc);
+	default:
+		break;
+	}
+	return ret;
+}
+
+static int otx2_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (nfc->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = otx2_set_rss_hash_opts(pfvf, nfc);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static u32 otx2_get_rxfh_key_size(struct net_device *netdev)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	struct otx2_rss_info *rss;
+
+	rss = &pfvf->hw.rss_info;
+
+	return sizeof(rss->key);
+}
+
+static u32 otx2_get_rxfh_indir_size(struct net_device *dev)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+
+	return pfvf->hw.rss_info.rss_size;
+}
+
+/* Get RSS configuration */
+static int otx2_get_rxfh(struct net_device *dev, u32 *indir,
+			 u8 *hkey, u8 *hfunc)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+	struct otx2_rss_info *rss;
+	int idx;
+
+	rss = &pfvf->hw.rss_info;
+
+	if (indir) {
+		for (idx = 0; idx < rss->rss_size; idx++)
+			indir[idx] = rss->ind_tbl[idx];
+	}
+
+	if (hkey)
+		memcpy(hkey, rss->key, sizeof(rss->key));
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	return 0;
+}
+
+/* Configure RSS table and hash key */
+static int otx2_set_rxfh(struct net_device *dev, const u32 *indir,
+			 const u8 *hkey, const u8 hfunc)
+{
+	struct otx2_nic *pfvf = netdev_priv(dev);
+	struct otx2_rss_info *rss;
+	int idx;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	rss = &pfvf->hw.rss_info;
+
+	if (!rss->enable) {
+		netdev_err(dev, "RSS is disabled, cannot change settings\n");
+		return -EIO;
+	}
+
+	if (indir) {
+		for (idx = 0; idx < rss->rss_size; idx++)
+			rss->ind_tbl[idx] = indir[idx];
+	}
+
+	if (hkey) {
+		memcpy(rss->key, hkey, sizeof(rss->key));
+		otx2_set_rss_key(pfvf);
+	}
+
+	otx2_set_rss_table(pfvf);
+	return 0;
+}
+
+static u32 otx2_get_msglevel(struct net_device *netdev)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+
+	return pfvf->msg_enable;
+}
+
+static void otx2_set_msglevel(struct net_device *netdev, u32 val)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+
+	pfvf->msg_enable = val;
+}
+
+static u32 otx2_get_link(struct net_device *netdev)
+{
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+
+	return pfvf->linfo.link_up;
+}
+
+static const struct ethtool_ops otx2_ethtool_ops = {
+	.get_link		= otx2_get_link,
+	.get_drvinfo		= otx2_get_drvinfo,
+	.get_strings		= otx2_get_strings,
+	.get_ethtool_stats	= otx2_get_ethtool_stats,
+	.get_sset_count		= otx2_get_sset_count,
+	.set_channels		= otx2_set_channels,
+	.get_channels		= otx2_get_channels,
+	.get_ringparam		= otx2_get_ringparam,
+	.set_ringparam		= otx2_set_ringparam,
+	.get_coalesce		= otx2_get_coalesce,
+	.set_coalesce		= otx2_set_coalesce,
+	.get_rxnfc		= otx2_get_rxnfc,
+	.set_rxnfc              = otx2_set_rxnfc,
+	.get_rxfh_key_size	= otx2_get_rxfh_key_size,
+	.get_rxfh_indir_size	= otx2_get_rxfh_indir_size,
+	.get_rxfh		= otx2_get_rxfh,
+	.set_rxfh		= otx2_set_rxfh,
+	.get_msglevel		= otx2_get_msglevel,
+	.set_msglevel		= otx2_set_msglevel,
+};
+
+void otx2_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &otx2_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
new file mode 100644
index 000000000000..85f9b9ba6bd5
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -0,0 +1,1349 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/of.h>
+#include <linux/if_vlan.h>
+#include <linux/iommu.h>
+#include <net/ip.h>
+
+#include "otx2_reg.h"
+#include "otx2_common.h"
+#include "otx2_txrx.h"
+#include "otx2_struct.h"
+
+#define DRV_NAME	"octeontx2-nicpf"
+#define DRV_STRING	"Marvell OcteonTX2 NIC Physical Function Driver"
+#define DRV_VERSION	"1.0"
+
+/* Supported devices */
+static const struct pci_device_id otx2_pf_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_RVU_PF) },
+	{ 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION(DRV_STRING);
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, otx2_pf_id_table);
+
+enum {
+	TYPE_PFAF,
+	TYPE_PFVF,
+};
+
+static int otx2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	bool if_up = netif_running(netdev);
+	int err = 0;
+
+	if (if_up)
+		otx2_stop(netdev);
+
+	netdev_info(netdev, "Changing MTU from %d to %d\n",
+		    netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (if_up)
+		err = otx2_open(netdev);
+
+	return err;
+}
+
+static void otx2_queue_work(struct mbox *mw, struct workqueue_struct *mbox_wq,
+			    int first, int mdevs, u64 intr, int type)
+{
+	struct otx2_mbox_dev *mdev;
+	struct otx2_mbox *mbox;
+	struct mbox_hdr *hdr;
+	int i;
+
+	for (i = first; i < mdevs; i++) {
+		/* start from 0 */
+		if (!(intr & BIT_ULL(i - first)))
+			continue;
+
+		mbox = &mw->mbox;
+		mdev = &mbox->dev[i];
+		if (type == TYPE_PFAF)
+			otx2_sync_mbox_bbuf(mbox, i);
+		hdr = mdev->mbase + mbox->rx_start;
+		/* The hdr->num_msgs is set to zero immediately in the interrupt
+		 * handler to  ensure that it holds a correct value next time
+		 * when the interrupt handler is called.
+		 * pf->mbox.num_msgs holds the data for use in pfaf_mbox_handler
+		 * pf>mbox.up_num_msgs holds the data for use in
+		 * pfaf_mbox_up_handler.
+		 */
+		if (hdr->num_msgs) {
+			mw[i].num_msgs = hdr->num_msgs;
+			hdr->num_msgs = 0;
+			if (type == TYPE_PFAF)
+				memset(mbox->hwbase + mbox->rx_start, 0,
+				       ALIGN(sizeof(struct mbox_hdr),
+					     sizeof(u64)));
+
+			queue_work(mbox_wq, &mw[i].mbox_wrk);
+		}
+
+		mbox = &mw->mbox_up;
+		mdev = &mbox->dev[i];
+		if (type == TYPE_PFAF)
+			otx2_sync_mbox_bbuf(mbox, i);
+		hdr = mdev->mbase + mbox->rx_start;
+		if (hdr->num_msgs) {
+			mw[i].up_num_msgs = hdr->num_msgs;
+			hdr->num_msgs = 0;
+			if (type == TYPE_PFAF)
+				memset(mbox->hwbase + mbox->rx_start, 0,
+				       ALIGN(sizeof(struct mbox_hdr),
+					     sizeof(u64)));
+
+			queue_work(mbox_wq, &mw[i].mbox_up_wrk);
+		}
+	}
+}
+
+static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf,
+				       struct mbox_msghdr *msg)
+{
+	if (msg->id >= MBOX_MSG_MAX) {
+		dev_err(pf->dev,
+			"Mbox msg with unknown ID 0x%x\n", msg->id);
+		return;
+	}
+
+	if (msg->sig != OTX2_MBOX_RSP_SIG) {
+		dev_err(pf->dev,
+			"Mbox msg with wrong signature %x, ID 0x%x\n",
+			 msg->sig, msg->id);
+		return;
+	}
+
+	switch (msg->id) {
+	case MBOX_MSG_READY:
+		pf->pcifunc = msg->pcifunc;
+		break;
+	case MBOX_MSG_MSIX_OFFSET:
+		mbox_handler_msix_offset(pf, (struct msix_offset_rsp *)msg);
+		break;
+	case MBOX_MSG_NPA_LF_ALLOC:
+		mbox_handler_npa_lf_alloc(pf, (struct npa_lf_alloc_rsp *)msg);
+		break;
+	case MBOX_MSG_NIX_LF_ALLOC:
+		mbox_handler_nix_lf_alloc(pf, (struct nix_lf_alloc_rsp *)msg);
+		break;
+	case MBOX_MSG_NIX_TXSCH_ALLOC:
+		mbox_handler_nix_txsch_alloc(pf,
+					     (struct nix_txsch_alloc_rsp *)msg);
+		break;
+	case MBOX_MSG_CGX_STATS:
+		mbox_handler_cgx_stats(pf, (struct cgx_stats_rsp *)msg);
+		break;
+	default:
+		if (msg->rc)
+			dev_err(pf->dev,
+				"Mbox msg response has err %d, ID 0x%x\n",
+				msg->rc, msg->id);
+		break;
+	}
+}
+
+static void otx2_pfaf_mbox_handler(struct work_struct *work)
+{
+	struct otx2_mbox_dev *mdev;
+	struct mbox_hdr *rsp_hdr;
+	struct mbox_msghdr *msg;
+	struct otx2_mbox *mbox;
+	struct mbox *af_mbox;
+	struct otx2_nic *pf;
+	int offset, id;
+
+	af_mbox = container_of(work, struct mbox, mbox_wrk);
+	mbox = &af_mbox->mbox;
+	mdev = &mbox->dev[0];
+	rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start);
+
+	offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN);
+	pf = af_mbox->pfvf;
+
+	for (id = 0; id < af_mbox->num_msgs; id++) {
+		msg = (struct mbox_msghdr *)(mdev->mbase + offset);
+		otx2_process_pfaf_mbox_msg(pf, msg);
+		offset = mbox->rx_start + msg->next_msgoff;
+		mdev->msgs_acked++;
+	}
+
+	otx2_mbox_reset(mbox, 0);
+}
+
+static void otx2_handle_link_event(struct otx2_nic *pf)
+{
+	struct cgx_link_user_info *linfo = &pf->linfo;
+	struct net_device *netdev = pf->netdev;
+
+	pr_info("%s NIC Link is %s %d Mbps %s duplex\n", netdev->name,
+		linfo->link_up ? "UP" : "DOWN", linfo->speed,
+		linfo->full_duplex ? "Full" : "Half");
+	if (linfo->link_up) {
+		netif_carrier_on(netdev);
+		netif_tx_start_all_queues(netdev);
+	} else {
+		netif_tx_stop_all_queues(netdev);
+		netif_carrier_off(netdev);
+	}
+}
+
+int otx2_mbox_up_handler_cgx_link_event(struct otx2_nic *pf,
+					struct cgx_link_info_msg *msg,
+					struct msg_rsp *rsp)
+{
+	/* Copy the link info sent by AF */
+	pf->linfo = msg->link_info;
+
+	/* interface has not been fully configured yet */
+	if (pf->flags & OTX2_FLAG_INTF_DOWN)
+		return 0;
+
+	otx2_handle_link_event(pf);
+	return 0;
+}
+
+static int otx2_process_mbox_msg_up(struct otx2_nic *pf,
+				    struct mbox_msghdr *req)
+{
+	/* Check if valid, if not reply with a invalid msg */
+	if (req->sig != OTX2_MBOX_REQ_SIG) {
+		otx2_reply_invalid_msg(&pf->mbox.mbox_up, 0, 0, req->id);
+		return -ENODEV;
+	}
+
+	switch (req->id) {
+#define M(_name, _id, _fn_name, _req_type, _rsp_type)			\
+	case _id: {							\
+		struct _rsp_type *rsp;					\
+		int err;						\
+									\
+		rsp = (struct _rsp_type *)otx2_mbox_alloc_msg(		\
+			&pf->mbox.mbox_up, 0,				\
+			sizeof(struct _rsp_type));			\
+		if (!rsp)						\
+			return -ENOMEM;					\
+									\
+		rsp->hdr.id = _id;					\
+		rsp->hdr.sig = OTX2_MBOX_RSP_SIG;			\
+		rsp->hdr.pcifunc = 0;					\
+		rsp->hdr.rc = 0;					\
+									\
+		err = otx2_mbox_up_handler_ ## _fn_name(		\
+			pf, (struct _req_type *)req, rsp);		\
+		return err;						\
+	}
+MBOX_UP_CGX_MESSAGES
+#undef M
+		break;
+	default:
+		otx2_reply_invalid_msg(&pf->mbox.mbox_up, 0, 0, req->id);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void otx2_pfaf_mbox_up_handler(struct work_struct *work)
+{
+	struct mbox *af_mbox = container_of(work, struct mbox, mbox_up_wrk);
+	struct otx2_mbox *mbox = &af_mbox->mbox_up;
+	struct otx2_mbox_dev *mdev = &mbox->dev[0];
+	struct otx2_nic *pf = af_mbox->pfvf;
+	int offset, id, devid = 0;
+	struct mbox_hdr *rsp_hdr;
+	struct mbox_msghdr *msg;
+
+	rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start);
+
+	offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN);
+
+	for (id = 0; id < af_mbox->up_num_msgs; id++) {
+		msg = (struct mbox_msghdr *)(mdev->mbase + offset);
+
+		devid = msg->pcifunc & RVU_PFVF_FUNC_MASK;
+		/* Skip processing VF's messages */
+		if (!devid)
+			otx2_process_mbox_msg_up(pf, msg);
+		offset = mbox->rx_start + msg->next_msgoff;
+	}
+
+	otx2_mbox_msg_send(mbox, 0);
+}
+
+static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq)
+{
+	struct otx2_nic *pf = (struct otx2_nic *)pf_irq;
+	struct mbox *mbox;
+
+	/* Clear the IRQ */
+	otx2_write64(pf, RVU_PF_INT, BIT_ULL(0));
+
+	mbox = &pf->mbox;
+	otx2_queue_work(mbox, pf->mbox_wq, 0, 1, 1, TYPE_PFAF);
+
+	return IRQ_HANDLED;
+}
+
+static void otx2_disable_mbox_intr(struct otx2_nic *pf)
+{
+	int vector = pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_AFPF_MBOX);
+
+	/* Disable AF => PF mailbox IRQ */
+	otx2_write64(pf, RVU_PF_INT_ENA_W1C, BIT_ULL(0));
+	free_irq(vector, pf);
+}
+
+static int otx2_register_mbox_intr(struct otx2_nic *pf, bool probe_af)
+{
+	struct otx2_hw *hw = &pf->hw;
+	struct msg_req *req;
+	char *irq_name;
+	int err;
+
+	/* Register mailbox interrupt handler */
+	irq_name = &hw->irq_name[RVU_PF_INT_VEC_AFPF_MBOX * NAME_SIZE];
+	snprintf(irq_name, NAME_SIZE, "RVUPFAF Mbox");
+	err = request_irq(pci_irq_vector(pf->pdev, RVU_PF_INT_VEC_AFPF_MBOX),
+			  otx2_pfaf_mbox_intr_handler, 0, irq_name, pf);
+	if (err) {
+		dev_err(pf->dev,
+			"RVUPF: IRQ registration failed for PFAF mbox irq\n");
+		return err;
+	}
+
+	/* Enable mailbox interrupt for msgs coming from AF.
+	 * First clear to avoid spurious interrupts, if any.
+	 */
+	otx2_write64(pf, RVU_PF_INT, BIT_ULL(0));
+	otx2_write64(pf, RVU_PF_INT_ENA_W1S, BIT_ULL(0));
+
+	if (!probe_af)
+		return 0;
+
+	/* Check mailbox communication with AF */
+	req = otx2_mbox_alloc_msg_ready(&pf->mbox);
+	if (!req) {
+		otx2_disable_mbox_intr(pf);
+		return -ENOMEM;
+	}
+	err = otx2_sync_mbox_msg(&pf->mbox);
+	if (err) {
+		dev_warn(pf->dev,
+			 "AF not responding to mailbox, deferring probe\n");
+		otx2_disable_mbox_intr(pf);
+		return -EPROBE_DEFER;
+	}
+
+	return 0;
+}
+
+static void otx2_pfaf_mbox_destroy(struct otx2_nic *pf)
+{
+	struct mbox *mbox = &pf->mbox;
+
+	if (pf->mbox_wq) {
+		flush_workqueue(pf->mbox_wq);
+		destroy_workqueue(pf->mbox_wq);
+		pf->mbox_wq = NULL;
+	}
+
+	if (mbox->mbox.hwbase)
+		iounmap((void __iomem *)mbox->mbox.hwbase);
+
+	otx2_mbox_destroy(&mbox->mbox);
+	otx2_mbox_destroy(&mbox->mbox_up);
+}
+
+static int otx2_pfaf_mbox_init(struct otx2_nic *pf)
+{
+	struct mbox *mbox = &pf->mbox;
+	void __iomem *hwbase;
+	int err;
+
+	mbox->pfvf = pf;
+	pf->mbox_wq = alloc_workqueue("otx2_pfaf_mailbox",
+				      WQ_UNBOUND | WQ_HIGHPRI |
+				      WQ_MEM_RECLAIM, 1);
+	if (!pf->mbox_wq)
+		return -ENOMEM;
+
+	/* Mailbox is a reserved memory (in RAM) region shared between
+	 * admin function (i.e AF) and this PF, shouldn't be mapped as
+	 * device memory to allow unaligned accesses.
+	 */
+	hwbase = ioremap_wc(pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM),
+			    pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM));
+	if (!hwbase) {
+		dev_err(pf->dev, "Unable to map PFAF mailbox region\n");
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	err = otx2_mbox_init(&mbox->mbox, hwbase, pf->pdev, pf->reg_base,
+			     MBOX_DIR_PFAF, 1);
+	if (err)
+		goto exit;
+
+	err = otx2_mbox_init(&mbox->mbox_up, hwbase, pf->pdev, pf->reg_base,
+			     MBOX_DIR_PFAF_UP, 1);
+	if (err)
+		goto exit;
+
+	err = otx2_mbox_bbuf_init(mbox, pf->pdev);
+	if (err)
+		goto exit;
+
+	INIT_WORK(&mbox->mbox_wrk, otx2_pfaf_mbox_handler);
+	INIT_WORK(&mbox->mbox_up_wrk, otx2_pfaf_mbox_up_handler);
+	otx2_mbox_lock_init(&pf->mbox);
+
+	return 0;
+exit:
+	otx2_pfaf_mbox_destroy(pf);
+	return err;
+}
+
+static int otx2_cgx_config_linkevents(struct otx2_nic *pf, bool enable)
+{
+	struct msg_req *msg;
+	int err;
+
+	otx2_mbox_lock(&pf->mbox);
+	if (enable)
+		msg = otx2_mbox_alloc_msg_cgx_start_linkevents(&pf->mbox);
+	else
+		msg = otx2_mbox_alloc_msg_cgx_stop_linkevents(&pf->mbox);
+
+	if (!msg) {
+		otx2_mbox_unlock(&pf->mbox);
+		return -ENOMEM;
+	}
+
+	err = otx2_sync_mbox_msg(&pf->mbox);
+	otx2_mbox_unlock(&pf->mbox);
+	return err;
+}
+
+static int otx2_cgx_config_loopback(struct otx2_nic *pf, bool enable)
+{
+	struct msg_req *msg;
+	int err;
+
+	otx2_mbox_lock(&pf->mbox);
+	if (enable)
+		msg = otx2_mbox_alloc_msg_cgx_intlbk_enable(&pf->mbox);
+	else
+		msg = otx2_mbox_alloc_msg_cgx_intlbk_disable(&pf->mbox);
+
+	if (!msg) {
+		otx2_mbox_unlock(&pf->mbox);
+		return -ENOMEM;
+	}
+
+	err = otx2_sync_mbox_msg(&pf->mbox);
+	otx2_mbox_unlock(&pf->mbox);
+	return err;
+}
+
+int otx2_set_real_num_queues(struct net_device *netdev,
+			     int tx_queues, int rx_queues)
+{
+	int err;
+
+	err = netif_set_real_num_tx_queues(netdev, tx_queues);
+	if (err) {
+		netdev_err(netdev,
+			   "Failed to set no of Tx queues: %d\n", tx_queues);
+		return err;
+	}
+
+	err = netif_set_real_num_rx_queues(netdev, rx_queues);
+	if (err)
+		netdev_err(netdev,
+			   "Failed to set no of Rx queues: %d\n", rx_queues);
+	return err;
+}
+
+static irqreturn_t otx2_q_intr_handler(int irq, void *data)
+{
+	struct otx2_nic *pf = data;
+	u64 val, *ptr;
+	u64 qidx = 0;
+
+	/* CQ */
+	for (qidx = 0; qidx < pf->qset.cq_cnt; qidx++) {
+		ptr = otx2_get_regaddr(pf, NIX_LF_CQ_OP_INT);
+		val = otx2_atomic64_add((qidx << 44), ptr);
+
+		otx2_write64(pf, NIX_LF_CQ_OP_INT, (qidx << 44) |
+			     (val & NIX_CQERRINT_BITS));
+		if (!(val & (NIX_CQERRINT_BITS | BIT_ULL(42))))
+			continue;
+
+		if (val & BIT_ULL(42)) {
+			netdev_err(pf->netdev, "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+				   qidx, otx2_read64(pf, NIX_LF_ERR_INT));
+		} else {
+			if (val & BIT_ULL(NIX_CQERRINT_DOOR_ERR))
+				netdev_err(pf->netdev, "CQ%lld: Doorbell error",
+					   qidx);
+			if (val & BIT_ULL(NIX_CQERRINT_CQE_FAULT))
+				netdev_err(pf->netdev, "CQ%lld: Memory fault on CQE write to LLC/DRAM",
+					   qidx);
+		}
+
+		schedule_work(&pf->reset_task);
+	}
+
+	/* SQ */
+	for (qidx = 0; qidx < pf->hw.tx_queues; qidx++) {
+		ptr = otx2_get_regaddr(pf, NIX_LF_SQ_OP_INT);
+		val = otx2_atomic64_add((qidx << 44), ptr);
+		otx2_write64(pf, NIX_LF_SQ_OP_INT, (qidx << 44) |
+			     (val & NIX_SQINT_BITS));
+
+		if (!(val & (NIX_SQINT_BITS | BIT_ULL(42))))
+			continue;
+
+		if (val & BIT_ULL(42)) {
+			netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
+				   qidx, otx2_read64(pf, NIX_LF_ERR_INT));
+		} else {
+			if (val & BIT_ULL(NIX_SQINT_LMT_ERR)) {
+				netdev_err(pf->netdev, "SQ%lld: LMT store error NIX_LF_SQ_OP_ERR_DBG:0x%llx",
+					   qidx,
+					   otx2_read64(pf,
+						       NIX_LF_SQ_OP_ERR_DBG));
+				otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG,
+					     BIT_ULL(44));
+			}
+			if (val & BIT_ULL(NIX_SQINT_MNQ_ERR)) {
+				netdev_err(pf->netdev, "SQ%lld: Meta-descriptor enqueue error NIX_LF_MNQ_ERR_DGB:0x%llx\n",
+					   qidx,
+					   otx2_read64(pf, NIX_LF_MNQ_ERR_DBG));
+				otx2_write64(pf, NIX_LF_MNQ_ERR_DBG,
+					     BIT_ULL(44));
+			}
+			if (val & BIT_ULL(NIX_SQINT_SEND_ERR)) {
+				netdev_err(pf->netdev, "SQ%lld: Send error, NIX_LF_SEND_ERR_DBG 0x%llx",
+					   qidx,
+					   otx2_read64(pf,
+						       NIX_LF_SEND_ERR_DBG));
+				otx2_write64(pf, NIX_LF_SEND_ERR_DBG,
+					     BIT_ULL(44));
+			}
+			if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
+				netdev_err(pf->netdev, "SQ%lld: SQB allocation failed",
+					   qidx);
+		}
+
+		schedule_work(&pf->reset_task);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq)
+{
+	struct otx2_cq_poll *cq_poll = (struct otx2_cq_poll *)cq_irq;
+	struct otx2_nic *pf = (struct otx2_nic *)cq_poll->dev;
+	int qidx = cq_poll->cint_idx;
+
+	/* Disable interrupts.
+	 *
+	 * Completion interrupts behave in a level-triggered interrupt
+	 * fashion, and hence have to be cleared only after it is serviced.
+	 */
+	otx2_write64(pf, NIX_LF_CINTX_ENA_W1C(qidx), BIT_ULL(0));
+
+	/* Schedule NAPI */
+	napi_schedule_irqoff(&cq_poll->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void otx2_disable_napi(struct otx2_nic *pf)
+{
+	struct otx2_qset *qset = &pf->qset;
+	struct otx2_cq_poll *cq_poll;
+	int qidx;
+
+	for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
+		cq_poll = &qset->napi[qidx];
+		napi_disable(&cq_poll->napi);
+		netif_napi_del(&cq_poll->napi);
+	}
+}
+
+static void otx2_free_cq_res(struct otx2_nic *pf)
+{
+	struct otx2_qset *qset = &pf->qset;
+	struct otx2_cq_queue *cq;
+	int qidx;
+
+	/* Disable CQs */
+	otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_CQ, false);
+	for (qidx = 0; qidx < qset->cq_cnt; qidx++) {
+		cq = &qset->cq[qidx];
+		qmem_free(pf->dev, cq->cqe);
+	}
+}
+
+static void otx2_free_sq_res(struct otx2_nic *pf)
+{
+	struct otx2_qset *qset = &pf->qset;
+	struct otx2_snd_queue *sq;
+	int qidx;
+
+	/* Disable SQs */
+	otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false);
+	/* Free SQB pointers */
+	otx2_sq_free_sqbs(pf);
+	for (qidx = 0; qidx < pf->hw.tx_queues; qidx++) {
+		sq = &qset->sq[qidx];
+		qmem_free(pf->dev, sq->sqe);
+		qmem_free(pf->dev, sq->tso_hdrs);
+		kfree(sq->sg);
+		kfree(sq->sqb_ptrs);
+	}
+}
+
+static int otx2_init_hw_resources(struct otx2_nic *pf)
+{
+	struct mbox *mbox = &pf->mbox;
+	struct otx2_hw *hw = &pf->hw;
+	struct msg_req *req;
+	int err = 0, lvl;
+
+	/* Set required NPA LF's pool counts
+	 * Auras and Pools are used in a 1:1 mapping,
+	 * so, aura count = pool count.
+	 */
+	hw->rqpool_cnt = hw->rx_queues;
+	hw->sqpool_cnt = hw->tx_queues;
+	hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt;
+
+	/* Get the size of receive buffers to allocate */
+	pf->rbsize = RCV_FRAG_LEN(pf->netdev->mtu + OTX2_ETH_HLEN);
+
+	otx2_mbox_lock(mbox);
+	/* NPA init */
+	err = otx2_config_npa(pf);
+	if (err)
+		goto exit;
+
+	/* NIX init */
+	err = otx2_config_nix(pf);
+	if (err)
+		goto err_free_npa_lf;
+
+	/* Init Auras and pools used by NIX RQ, for free buffer ptrs */
+	err = otx2_rq_aura_pool_init(pf);
+	if (err) {
+		otx2_mbox_unlock(mbox);
+		goto err_free_nix_lf;
+	}
+	/* Init Auras and pools used by NIX SQ, for queueing SQEs */
+	err = otx2_sq_aura_pool_init(pf);
+	if (err) {
+		otx2_mbox_unlock(mbox);
+		goto err_free_rq_ptrs;
+	}
+
+	err = otx2_txsch_alloc(pf);
+	if (err) {
+		otx2_mbox_unlock(mbox);
+		goto err_free_sq_ptrs;
+	}
+
+	err = otx2_config_nix_queues(pf);
+	if (err) {
+		otx2_mbox_unlock(mbox);
+		goto err_free_txsch;
+	}
+	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+		err = otx2_txschq_config(pf, lvl);
+		if (err) {
+			otx2_mbox_unlock(mbox);
+			goto err_free_nix_queues;
+		}
+	}
+	otx2_mbox_unlock(mbox);
+	return err;
+
+err_free_nix_queues:
+	otx2_free_sq_res(pf);
+	otx2_free_cq_res(pf);
+	otx2_ctx_disable(mbox, NIX_AQ_CTYPE_RQ, false);
+err_free_txsch:
+	if (otx2_txschq_stop(pf))
+		dev_err(pf->dev, "%s failed to stop TX schedulers\n", __func__);
+err_free_sq_ptrs:
+	otx2_sq_free_sqbs(pf);
+err_free_rq_ptrs:
+	otx2_free_aura_ptr(pf, AURA_NIX_RQ);
+	otx2_ctx_disable(mbox, NPA_AQ_CTYPE_POOL, true);
+	otx2_ctx_disable(mbox, NPA_AQ_CTYPE_AURA, true);
+	otx2_aura_pool_free(pf);
+err_free_nix_lf:
+	otx2_mbox_lock(mbox);
+	req = otx2_mbox_alloc_msg_nix_lf_free(mbox);
+	if (req) {
+		if (otx2_sync_mbox_msg(mbox))
+			dev_err(pf->dev, "%s failed to free nixlf\n", __func__);
+	}
+err_free_npa_lf:
+	/* Reset NPA LF */
+	req = otx2_mbox_alloc_msg_npa_lf_free(mbox);
+	if (req) {
+		if (otx2_sync_mbox_msg(mbox))
+			dev_err(pf->dev, "%s failed to free npalf\n", __func__);
+	}
+exit:
+	otx2_mbox_unlock(mbox);
+	return err;
+}
+
+static void otx2_free_hw_resources(struct otx2_nic *pf)
+{
+	struct otx2_qset *qset = &pf->qset;
+	struct mbox *mbox = &pf->mbox;
+	struct otx2_cq_queue *cq;
+	struct msg_req *req;
+	int qidx, err;
+
+	/* Ensure all SQE are processed */
+	otx2_sqb_flush(pf);
+
+	/* Stop transmission */
+	err = otx2_txschq_stop(pf);
+	if (err)
+		dev_err(pf->dev, "RVUPF: Failed to stop/free TX schedulers\n");
+
+	/* Disable RQs */
+	otx2_ctx_disable(mbox, NIX_AQ_CTYPE_RQ, false);
+
+	/*Dequeue all CQEs */
+	for (qidx = 0; qidx < qset->cq_cnt; qidx++) {
+		cq = &qset->cq[qidx];
+		if (cq->cq_type == CQ_RX)
+			otx2_cleanup_rx_cqes(pf, cq);
+		else
+			otx2_cleanup_tx_cqes(pf, cq);
+	}
+
+	otx2_free_sq_res(pf);
+
+	/* Free RQ buffer pointers*/
+	otx2_free_aura_ptr(pf, AURA_NIX_RQ);
+
+	otx2_free_cq_res(pf);
+
+	otx2_mbox_lock(mbox);
+	/* Reset NIX LF */
+	req = otx2_mbox_alloc_msg_nix_lf_free(mbox);
+	if (req) {
+		if (otx2_sync_mbox_msg(mbox))
+			dev_err(pf->dev, "%s failed to free nixlf\n", __func__);
+	}
+	otx2_mbox_unlock(mbox);
+
+	/* Disable NPA Pool and Aura hw context */
+	otx2_ctx_disable(mbox, NPA_AQ_CTYPE_POOL, true);
+	otx2_ctx_disable(mbox, NPA_AQ_CTYPE_AURA, true);
+	otx2_aura_pool_free(pf);
+
+	otx2_mbox_lock(mbox);
+	/* Reset NPA LF */
+	req = otx2_mbox_alloc_msg_npa_lf_free(mbox);
+	if (req) {
+		if (otx2_sync_mbox_msg(mbox))
+			dev_err(pf->dev, "%s failed to free npalf\n", __func__);
+	}
+	otx2_mbox_unlock(mbox);
+}
+
+int otx2_open(struct net_device *netdev)
+{
+	struct otx2_nic *pf = netdev_priv(netdev);
+	struct otx2_cq_poll *cq_poll = NULL;
+	struct otx2_qset *qset = &pf->qset;
+	int err = 0, qidx, vec;
+	char *irq_name;
+
+	netif_carrier_off(netdev);
+
+	pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.tx_queues;
+	/* RQ and SQs are mapped to different CQs,
+	 * so find out max CQ IRQs (i.e CINTs) needed.
+	 */
+	pf->hw.cint_cnt = max(pf->hw.rx_queues, pf->hw.tx_queues);
+	qset->napi = kcalloc(pf->hw.cint_cnt, sizeof(*cq_poll), GFP_KERNEL);
+	if (!qset->napi)
+		return -ENOMEM;
+
+	/* CQ size of RQ */
+	qset->rqe_cnt = qset->rqe_cnt ? qset->rqe_cnt : Q_COUNT(Q_SIZE_256);
+	/* CQ size of SQ */
+	qset->sqe_cnt = qset->sqe_cnt ? qset->sqe_cnt : Q_COUNT(Q_SIZE_4K);
+
+	err = -ENOMEM;
+	qset->cq = kcalloc(pf->qset.cq_cnt,
+			   sizeof(struct otx2_cq_queue), GFP_KERNEL);
+	if (!qset->cq)
+		goto err_free_mem;
+
+	qset->sq = kcalloc(pf->hw.tx_queues,
+			   sizeof(struct otx2_snd_queue), GFP_KERNEL);
+	if (!qset->sq)
+		goto err_free_mem;
+
+	qset->rq = kcalloc(pf->hw.rx_queues,
+			   sizeof(struct otx2_rcv_queue), GFP_KERNEL);
+	if (!qset->rq)
+		goto err_free_mem;
+
+	err = otx2_init_hw_resources(pf);
+	if (err)
+		goto err_free_mem;
+
+	/* Register NAPI handler */
+	for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
+		cq_poll = &qset->napi[qidx];
+		cq_poll->cint_idx = qidx;
+		/* RQ0 & SQ0 are mapped to CINT0 and so on..
+		 * 'cq_ids[0]' points to RQ's CQ and
+		 * 'cq_ids[1]' points to SQ's CQ and
+		 */
+		cq_poll->cq_ids[CQ_RX] =
+			(qidx <  pf->hw.rx_queues) ? qidx : CINT_INVALID_CQ;
+		cq_poll->cq_ids[CQ_TX] = (qidx < pf->hw.tx_queues) ?
+				      qidx + pf->hw.rx_queues : CINT_INVALID_CQ;
+		cq_poll->dev = (void *)pf;
+		netif_napi_add(netdev, &cq_poll->napi,
+			       otx2_napi_handler, NAPI_POLL_WEIGHT);
+		napi_enable(&cq_poll->napi);
+	}
+
+	/* Set maximum frame size allowed in HW */
+	err = otx2_hw_set_mtu(pf, netdev->mtu);
+	if (err)
+		goto err_disable_napi;
+
+	/* Initialize RSS */
+	err = otx2_rss_init(pf);
+	if (err)
+		goto err_disable_napi;
+
+	/* Register Queue IRQ handlers */
+	vec = pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START;
+	irq_name = &pf->hw.irq_name[vec * NAME_SIZE];
+
+	snprintf(irq_name, NAME_SIZE, "%s-qerr", pf->netdev->name);
+
+	err = request_irq(pci_irq_vector(pf->pdev, vec),
+			  otx2_q_intr_handler, 0, irq_name, pf);
+	if (err) {
+		dev_err(pf->dev,
+			"RVUPF%d: IRQ registration failed for QERR\n",
+			rvu_get_pf(pf->pcifunc));
+		goto err_disable_napi;
+	}
+
+	/* Enable QINT IRQ */
+	otx2_write64(pf, NIX_LF_QINTX_ENA_W1S(0), BIT_ULL(0));
+
+	/* Register CQ IRQ handlers */
+	vec = pf->hw.nix_msixoff + NIX_LF_CINT_VEC_START;
+	for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
+		irq_name = &pf->hw.irq_name[vec * NAME_SIZE];
+
+		snprintf(irq_name, NAME_SIZE, "%s-rxtx-%d", pf->netdev->name,
+			 qidx);
+
+		err = request_irq(pci_irq_vector(pf->pdev, vec),
+				  otx2_cq_intr_handler, 0, irq_name,
+				  &qset->napi[qidx]);
+		if (err) {
+			dev_err(pf->dev,
+				"RVUPF%d: IRQ registration failed for CQ%d\n",
+				rvu_get_pf(pf->pcifunc), qidx);
+			goto err_free_cints;
+		}
+		vec++;
+
+		otx2_config_irq_coalescing(pf, qidx);
+
+		/* Enable CQ IRQ */
+		otx2_write64(pf, NIX_LF_CINTX_INT(qidx), BIT_ULL(0));
+		otx2_write64(pf, NIX_LF_CINTX_ENA_W1S(qidx), BIT_ULL(0));
+	}
+
+	otx2_set_cints_affinity(pf);
+
+	pf->flags &= ~OTX2_FLAG_INTF_DOWN;
+	/* 'intf_down' may be checked on any cpu */
+	smp_wmb();
+
+	/* we have already received link status notification */
+	if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK))
+		otx2_handle_link_event(pf);
+
+	err = otx2_rxtx_enable(pf, true);
+	if (err)
+		goto err_free_cints;
+
+	return 0;
+
+err_free_cints:
+	otx2_free_cints(pf, qidx);
+	vec = pci_irq_vector(pf->pdev,
+			     pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
+	otx2_write64(pf, NIX_LF_QINTX_ENA_W1C(0), BIT_ULL(0));
+	synchronize_irq(vec);
+	free_irq(vec, pf);
+err_disable_napi:
+	otx2_disable_napi(pf);
+	otx2_free_hw_resources(pf);
+err_free_mem:
+	kfree(qset->sq);
+	kfree(qset->cq);
+	kfree(qset->rq);
+	kfree(qset->napi);
+	return err;
+}
+
+int otx2_stop(struct net_device *netdev)
+{
+	struct otx2_nic *pf = netdev_priv(netdev);
+	struct otx2_cq_poll *cq_poll = NULL;
+	struct otx2_qset *qset = &pf->qset;
+	int qidx, vec, wrk;
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	pf->flags |= OTX2_FLAG_INTF_DOWN;
+	/* 'intf_down' may be checked on any cpu */
+	smp_wmb();
+
+	/* First stop packet Rx/Tx */
+	otx2_rxtx_enable(pf, false);
+
+	/* Cleanup Queue IRQ */
+	vec = pci_irq_vector(pf->pdev,
+			     pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
+	otx2_write64(pf, NIX_LF_QINTX_ENA_W1C(0), BIT_ULL(0));
+	synchronize_irq(vec);
+	free_irq(vec, pf);
+
+	/* Cleanup CQ NAPI and IRQ */
+	vec = pf->hw.nix_msixoff + NIX_LF_CINT_VEC_START;
+	for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
+		/* Disable interrupt */
+		otx2_write64(pf, NIX_LF_CINTX_ENA_W1C(qidx), BIT_ULL(0));
+
+		synchronize_irq(pci_irq_vector(pf->pdev, vec));
+
+		cq_poll = &qset->napi[qidx];
+		napi_synchronize(&cq_poll->napi);
+		vec++;
+	}
+
+	netif_tx_disable(netdev);
+
+	otx2_free_hw_resources(pf);
+	otx2_free_cints(pf, pf->hw.cint_cnt);
+	otx2_disable_napi(pf);
+
+	for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
+		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
+
+	for (wrk = 0; wrk < pf->qset.cq_cnt; wrk++)
+		cancel_delayed_work_sync(&pf->refill_wrk[wrk].pool_refill_work);
+	devm_kfree(pf->dev, pf->refill_wrk);
+
+	kfree(qset->sq);
+	kfree(qset->cq);
+	kfree(qset->rq);
+	kfree(qset->napi);
+	/* Do not clear RQ/SQ ringsize settings */
+	memset((void *)qset + offsetof(struct otx2_qset, sqe_cnt), 0,
+	       sizeof(*qset) - offsetof(struct otx2_qset, sqe_cnt));
+	return 0;
+}
+
+static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct otx2_nic *pf = netdev_priv(netdev);
+	int qidx = skb_get_queue_mapping(skb);
+	struct otx2_snd_queue *sq;
+	struct netdev_queue *txq;
+
+	/* Check for minimum and maximum packet length */
+	if (skb->len <= ETH_HLEN ||
+	    (!skb_shinfo(skb)->gso_size && skb->len > pf->max_frs)) {
+		dev_kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	sq = &pf->qset.sq[qidx];
+	txq = netdev_get_tx_queue(netdev, qidx);
+
+	if (!otx2_sq_append_skb(netdev, sq, skb, qidx)) {
+		netif_tx_stop_queue(txq);
+
+		/* Check again, incase SQBs got freed up */
+		smp_mb();
+		if (((sq->num_sqbs - *sq->aura_fc_addr) * sq->sqe_per_sqb)
+							> sq->sqe_thresh)
+			netif_tx_wake_queue(txq);
+
+		return NETDEV_TX_BUSY;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static void otx2_set_rx_mode(struct net_device *netdev)
+{
+	struct otx2_nic *pf = netdev_priv(netdev);
+	struct nix_rx_mode *req;
+
+	if (!(netdev->flags & IFF_UP))
+		return;
+
+	otx2_mbox_lock(&pf->mbox);
+	req = otx2_mbox_alloc_msg_nix_set_rx_mode(&pf->mbox);
+	if (!req) {
+		otx2_mbox_unlock(&pf->mbox);
+		return;
+	}
+
+	req->mode = NIX_RX_MODE_UCAST;
+
+	/* We don't support MAC address filtering yet */
+	if (netdev->flags & IFF_PROMISC)
+		req->mode |= NIX_RX_MODE_PROMISC;
+	else if (netdev->flags & (IFF_ALLMULTI | IFF_MULTICAST))
+		req->mode |= NIX_RX_MODE_ALLMULTI;
+
+	otx2_sync_mbox_msg(&pf->mbox);
+	otx2_mbox_unlock(&pf->mbox);
+}
+
+static int otx2_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = features ^ netdev->features;
+	struct otx2_nic *pf = netdev_priv(netdev);
+
+	if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
+		return otx2_cgx_config_loopback(pf,
+						features & NETIF_F_LOOPBACK);
+	return 0;
+}
+
+static void otx2_reset_task(struct work_struct *work)
+{
+	struct otx2_nic *pf = container_of(work, struct otx2_nic, reset_task);
+
+	if (!netif_running(pf->netdev))
+		return;
+
+	otx2_stop(pf->netdev);
+	pf->reset_count++;
+	otx2_open(pf->netdev);
+	netif_trans_update(pf->netdev);
+}
+
+static const struct net_device_ops otx2_netdev_ops = {
+	.ndo_open		= otx2_open,
+	.ndo_stop		= otx2_stop,
+	.ndo_start_xmit		= otx2_xmit,
+	.ndo_set_mac_address    = otx2_set_mac_address,
+	.ndo_change_mtu		= otx2_change_mtu,
+	.ndo_set_rx_mode	= otx2_set_rx_mode,
+	.ndo_set_features	= otx2_set_features,
+	.ndo_tx_timeout		= otx2_tx_timeout,
+	.ndo_get_stats64	= otx2_get_stats64,
+};
+
+static int otx2_check_pf_usable(struct otx2_nic *nic)
+{
+	u64 rev;
+
+	rev = otx2_read64(nic, RVU_PF_BLOCK_ADDRX_DISC(BLKADDR_RVUM));
+	rev = (rev >> 12) & 0xFF;
+	/* Check if AF has setup revision for RVUM block,
+	 * otherwise this driver probe should be deferred
+	 * until AF driver comes up.
+	 */
+	if (!rev) {
+		dev_warn(nic->dev,
+			 "AF is not initialized, deferring probe\n");
+		return -EPROBE_DEFER;
+	}
+	return 0;
+}
+
+static int otx2_realloc_msix_vectors(struct otx2_nic *pf)
+{
+	struct otx2_hw *hw = &pf->hw;
+	int num_vec, err;
+
+	/* NPA interrupts are inot registered, so alloc only
+	 * upto NIX vector offset.
+	 */
+	num_vec = hw->nix_msixoff;
+	num_vec += NIX_LF_CINT_VEC_START + hw->max_queues;
+
+	otx2_disable_mbox_intr(pf);
+	pci_free_irq_vectors(hw->pdev);
+	pci_free_irq_vectors(hw->pdev);
+	err = pci_alloc_irq_vectors(hw->pdev, num_vec, num_vec, PCI_IRQ_MSIX);
+	if (err < 0) {
+		dev_err(pf->dev, "%s: Failed to realloc %d IRQ vectors\n",
+			__func__, num_vec);
+		return err;
+	}
+
+	return otx2_register_mbox_intr(pf, false);
+}
+
+static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct device *dev = &pdev->dev;
+	struct net_device *netdev;
+	struct otx2_nic *pf;
+	struct otx2_hw *hw;
+	int err, qcount;
+	int num_vec;
+
+	err = pcim_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		return err;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		return err;
+	}
+
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (err) {
+		dev_err(dev, "DMA mask config failed, abort\n");
+		goto err_release_regions;
+	}
+
+	pci_set_master(pdev);
+
+	/* Set number of queues */
+	qcount = min_t(int, num_online_cpus(), OTX2_MAX_CQ_CNT);
+
+	netdev = alloc_etherdev_mqs(sizeof(*pf), qcount, qcount);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_release_regions;
+	}
+
+	pci_set_drvdata(pdev, netdev);
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	pf = netdev_priv(netdev);
+	pf->netdev = netdev;
+	pf->pdev = pdev;
+	pf->dev = dev;
+	pf->flags |= OTX2_FLAG_INTF_DOWN;
+
+	hw = &pf->hw;
+	hw->pdev = pdev;
+	hw->rx_queues = qcount;
+	hw->tx_queues = qcount;
+	hw->max_queues = qcount;
+
+	num_vec = pci_msix_vec_count(pdev);
+	hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE,
+					  GFP_KERNEL);
+	if (!hw->irq_name)
+		goto err_free_netdev;
+
+	hw->affinity_mask = devm_kcalloc(&hw->pdev->dev, num_vec,
+					 sizeof(cpumask_var_t), GFP_KERNEL);
+	if (!hw->affinity_mask)
+		goto err_free_netdev;
+
+	/* Map CSRs */
+	pf->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+	if (!pf->reg_base) {
+		dev_err(dev, "Unable to map physical function CSRs, aborting\n");
+		err = -ENOMEM;
+		goto err_free_netdev;
+	}
+
+	err = otx2_check_pf_usable(pf);
+	if (err)
+		goto err_free_netdev;
+
+	err = pci_alloc_irq_vectors(hw->pdev, RVU_PF_INT_VEC_CNT,
+				    RVU_PF_INT_VEC_CNT, PCI_IRQ_MSIX);
+	if (err < 0) {
+		dev_err(dev, "%s: Failed to alloc %d IRQ vectors\n",
+			__func__, num_vec);
+		goto err_free_netdev;
+	}
+
+	/* Init PF <=> AF mailbox stuff */
+	err = otx2_pfaf_mbox_init(pf);
+	if (err)
+		goto err_free_irq_vectors;
+
+	/* Register mailbox interrupt */
+	err = otx2_register_mbox_intr(pf, true);
+	if (err)
+		goto err_mbox_destroy;
+
+	/* Request AF to attach NPA and NIX LFs to this PF.
+	 * NIX and NPA LFs are needed for this PF to function as a NIC.
+	 */
+	err = otx2_attach_npa_nix(pf);
+	if (err)
+		goto err_disable_mbox_intr;
+
+	err = otx2_realloc_msix_vectors(pf);
+	if (err)
+		goto err_detach_rsrc;
+
+	err = otx2_set_real_num_queues(netdev, hw->tx_queues, hw->rx_queues);
+	if (err)
+		goto err_detach_rsrc;
+
+	otx2_setup_dev_hw_settings(pf);
+
+	/* Assign default mac address */
+	otx2_get_mac_from_af(netdev);
+
+	/* NPA's pool is a stack to which SW frees buffer pointers via Aura.
+	 * HW allocates buffer pointer from stack and uses it for DMA'ing
+	 * ingress packet. In some scenarios HW can free back allocated buffer
+	 * pointers to pool. This makes it impossible for SW to maintain a
+	 * parallel list where physical addresses of buffer pointers (IOVAs)
+	 * given to HW can be saved for later reference.
+	 *
+	 * So the only way to convert Rx packet's buffer address is to use
+	 * IOMMU's iova_to_phys() handler which translates the address by
+	 * walking through the translation tables.
+	 */
+	pf->iommu_domain = iommu_get_domain_for_dev(dev);
+
+	netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
+			       NETIF_F_IPV6_CSUM | NETIF_F_RXHASH |
+			       NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6);
+	netdev->features |= netdev->hw_features;
+
+	netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
+
+	netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
+	netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
+
+	netdev->netdev_ops = &otx2_netdev_ops;
+
+	/* MTU range: 64 - 9190 */
+	netdev->min_mtu = OTX2_MIN_MTU;
+	netdev->max_mtu = OTX2_MAX_MTU;
+
+	INIT_WORK(&pf->reset_task, otx2_reset_task);
+
+	err = register_netdev(netdev);
+	if (err) {
+		dev_err(dev, "Failed to register netdevice\n");
+		goto err_detach_rsrc;
+	}
+
+	otx2_set_ethtool_ops(netdev);
+
+	/* Enable link notifications */
+	otx2_cgx_config_linkevents(pf, true);
+
+	return 0;
+
+err_detach_rsrc:
+	otx2_detach_resources(&pf->mbox);
+err_disable_mbox_intr:
+	otx2_disable_mbox_intr(pf);
+err_mbox_destroy:
+	otx2_pfaf_mbox_destroy(pf);
+err_free_irq_vectors:
+	pci_free_irq_vectors(hw->pdev);
+err_free_netdev:
+	pci_set_drvdata(pdev, NULL);
+	free_netdev(netdev);
+err_release_regions:
+	pci_release_regions(pdev);
+	return err;
+}
+
+static void otx2_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct otx2_nic *pf;
+
+	if (!netdev)
+		return;
+
+	pf = netdev_priv(netdev);
+
+	/* Disable link notifications */
+	otx2_cgx_config_linkevents(pf, false);
+
+	unregister_netdev(netdev);
+	otx2_detach_resources(&pf->mbox);
+	otx2_disable_mbox_intr(pf);
+	otx2_pfaf_mbox_destroy(pf);
+	pci_free_irq_vectors(pf->pdev);
+	pci_set_drvdata(pdev, NULL);
+	free_netdev(netdev);
+
+	pci_release_regions(pdev);
+}
+
+static struct pci_driver otx2_pf_driver = {
+	.name = DRV_NAME,
+	.id_table = otx2_pf_id_table,
+	.probe = otx2_probe,
+	.shutdown = otx2_remove,
+	.remove = otx2_remove,
+};
+
+static int __init otx2_rvupf_init_module(void)
+{
+	pr_info("%s: %s\n", DRV_NAME, DRV_STRING);
+
+	return pci_register_driver(&otx2_pf_driver);
+}
+
+static void __exit otx2_rvupf_cleanup_module(void)
+{
+	pci_unregister_driver(&otx2_pf_driver);
+}
+
+module_init(otx2_rvupf_init_module);
+module_exit(otx2_rvupf_cleanup_module);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
new file mode 100644
index 000000000000..7963d418886a
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef OTX2_REG_H
+#define OTX2_REG_H
+
+#include <rvu_struct.h>
+
+/* RVU PF registers */
+#define	RVU_PF_VFX_PFVF_MBOX0		    (0x00000)
+#define	RVU_PF_VFX_PFVF_MBOX1		    (0x00008)
+#define RVU_PF_VFX_PFVF_MBOXX(a, b)         (0x0 | (a) << 12 | (b) << 3)
+#define RVU_PF_VF_BAR4_ADDR                 (0x10)
+#define RVU_PF_BLOCK_ADDRX_DISC(a)          (0x200 | (a) << 3)
+#define RVU_PF_VFME_STATUSX(a)              (0x800 | (a) << 3)
+#define RVU_PF_VFTRPENDX(a)                 (0x820 | (a) << 3)
+#define RVU_PF_VFTRPEND_W1SX(a)             (0x840 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INTX(a)            (0x880 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INT_W1SX(a)        (0x8A0 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INT_ENA_W1SX(a)    (0x8C0 | (a) << 3)
+#define RVU_PF_VFPF_MBOX_INT_ENA_W1CX(a)    (0x8E0 | (a) << 3)
+#define RVU_PF_VFFLR_INTX(a)                (0x900 | (a) << 3)
+#define RVU_PF_VFFLR_INT_W1SX(a)            (0x920 | (a) << 3)
+#define RVU_PF_VFFLR_INT_ENA_W1SX(a)        (0x940 | (a) << 3)
+#define RVU_PF_VFFLR_INT_ENA_W1CX(a)        (0x960 | (a) << 3)
+#define RVU_PF_VFME_INTX(a)                 (0x980 | (a) << 3)
+#define RVU_PF_VFME_INT_W1SX(a)             (0x9A0 | (a) << 3)
+#define RVU_PF_VFME_INT_ENA_W1SX(a)         (0x9C0 | (a) << 3)
+#define RVU_PF_VFME_INT_ENA_W1CX(a)         (0x9E0 | (a) << 3)
+#define RVU_PF_PFAF_MBOX0                   (0xC00)
+#define RVU_PF_PFAF_MBOX1                   (0xC08)
+#define RVU_PF_PFAF_MBOXX(a)                (0xC00 | (a) << 3)
+#define RVU_PF_INT                          (0xc20)
+#define RVU_PF_INT_W1S                      (0xc28)
+#define RVU_PF_INT_ENA_W1S                  (0xc30)
+#define RVU_PF_INT_ENA_W1C                  (0xc38)
+#define RVU_PF_MSIX_VECX_ADDR(a)            (0x000 | (a) << 4)
+#define RVU_PF_MSIX_VECX_CTL(a)             (0x008 | (a) << 4)
+#define RVU_PF_MSIX_PBAX(a)                 (0xF0000 | (a) << 3)
+
+#define RVU_FUNC_BLKADDR_SHIFT		20
+#define RVU_FUNC_BLKADDR_MASK		0x1FULL
+
+/* NPA LF registers */
+#define NPA_LFBASE			(BLKTYPE_NPA << RVU_FUNC_BLKADDR_SHIFT)
+#define NPA_LF_AURA_OP_ALLOCX(a)	(NPA_LFBASE | 0x10 | (a) << 3)
+#define NPA_LF_AURA_OP_FREE0            (NPA_LFBASE | 0x20)
+#define NPA_LF_AURA_OP_FREE1            (NPA_LFBASE | 0x28)
+#define NPA_LF_AURA_OP_CNT              (NPA_LFBASE | 0x30)
+#define NPA_LF_AURA_OP_LIMIT            (NPA_LFBASE | 0x50)
+#define NPA_LF_AURA_OP_INT              (NPA_LFBASE | 0x60)
+#define NPA_LF_AURA_OP_THRESH           (NPA_LFBASE | 0x70)
+#define NPA_LF_POOL_OP_PC               (NPA_LFBASE | 0x100)
+#define NPA_LF_POOL_OP_AVAILABLE        (NPA_LFBASE | 0x110)
+#define NPA_LF_POOL_OP_PTR_START0       (NPA_LFBASE | 0x120)
+#define NPA_LF_POOL_OP_PTR_START1       (NPA_LFBASE | 0x128)
+#define NPA_LF_POOL_OP_PTR_END0         (NPA_LFBASE | 0x130)
+#define NPA_LF_POOL_OP_PTR_END1         (NPA_LFBASE | 0x138)
+#define NPA_LF_POOL_OP_INT              (NPA_LFBASE | 0x160)
+#define NPA_LF_POOL_OP_THRESH           (NPA_LFBASE | 0x170)
+#define NPA_LF_ERR_INT                  (NPA_LFBASE | 0x200)
+#define NPA_LF_ERR_INT_W1S              (NPA_LFBASE | 0x208)
+#define NPA_LF_ERR_INT_ENA_W1C          (NPA_LFBASE | 0x210)
+#define NPA_LF_ERR_INT_ENA_W1S          (NPA_LFBASE | 0x218)
+#define NPA_LF_RAS                      (NPA_LFBASE | 0x220)
+#define NPA_LF_RAS_W1S                  (NPA_LFBASE | 0x228)
+#define NPA_LF_RAS_ENA_W1C              (NPA_LFBASE | 0x230)
+#define NPA_LF_RAS_ENA_W1S              (NPA_LFBASE | 0x238)
+#define NPA_LF_QINTX_CNT(a)             (NPA_LFBASE | 0x300 | (a) << 12)
+#define NPA_LF_QINTX_INT(a)             (NPA_LFBASE | 0x310 | (a) << 12)
+#define NPA_LF_QINTX_INT_W1S(a)         (NPA_LFBASE | 0x318 | (a) << 12)
+#define NPA_LF_QINTX_ENA_W1S(a)         (NPA_LFBASE | 0x320 | (a) << 12)
+#define NPA_LF_QINTX_ENA_W1C(a)         (NPA_LFBASE | 0x330 | (a) << 12)
+
+/* NIX LF registers */
+#define	NIX_LFBASE			(BLKTYPE_NIX << RVU_FUNC_BLKADDR_SHIFT)
+#define	NIX_LF_RX_SECRETX(a)		(NIX_LFBASE | 0x0 | (a) << 3)
+#define	NIX_LF_CFG			(NIX_LFBASE | 0x100)
+#define	NIX_LF_GINT			(NIX_LFBASE | 0x200)
+#define	NIX_LF_GINT_W1S			(NIX_LFBASE | 0x208)
+#define	NIX_LF_GINT_ENA_W1C		(NIX_LFBASE | 0x210)
+#define	NIX_LF_GINT_ENA_W1S		(NIX_LFBASE | 0x218)
+#define	NIX_LF_ERR_INT			(NIX_LFBASE | 0x220)
+#define	NIX_LF_ERR_INT_W1S		(NIX_LFBASE | 0x228)
+#define	NIX_LF_ERR_INT_ENA_W1C		(NIX_LFBASE | 0x230)
+#define	NIX_LF_ERR_INT_ENA_W1S		(NIX_LFBASE | 0x238)
+#define	NIX_LF_RAS			(NIX_LFBASE | 0x240)
+#define	NIX_LF_RAS_W1S			(NIX_LFBASE | 0x248)
+#define	NIX_LF_RAS_ENA_W1C		(NIX_LFBASE | 0x250)
+#define	NIX_LF_RAS_ENA_W1S		(NIX_LFBASE | 0x258)
+#define	NIX_LF_SQ_OP_ERR_DBG		(NIX_LFBASE | 0x260)
+#define	NIX_LF_MNQ_ERR_DBG		(NIX_LFBASE | 0x270)
+#define	NIX_LF_SEND_ERR_DBG		(NIX_LFBASE | 0x280)
+#define	NIX_LF_TX_STATX(a)		(NIX_LFBASE | 0x300 | (a) << 3)
+#define	NIX_LF_RX_STATX(a)		(NIX_LFBASE | 0x400 | (a) << 3)
+#define	NIX_LF_OP_SENDX(a)		(NIX_LFBASE | 0x800 | (a) << 3)
+#define	NIX_LF_RQ_OP_INT		(NIX_LFBASE | 0x900)
+#define	NIX_LF_RQ_OP_OCTS		(NIX_LFBASE | 0x910)
+#define	NIX_LF_RQ_OP_PKTS		(NIX_LFBASE | 0x920)
+#define	NIX_LF_OP_IPSEC_DYNO_CN		(NIX_LFBASE | 0x980)
+#define	NIX_LF_SQ_OP_INT		(NIX_LFBASE | 0xa00)
+#define	NIX_LF_SQ_OP_OCTS		(NIX_LFBASE | 0xa10)
+#define	NIX_LF_SQ_OP_PKTS		(NIX_LFBASE | 0xa20)
+#define	NIX_LF_SQ_OP_STATUS		(NIX_LFBASE | 0xa30)
+#define	NIX_LF_CQ_OP_INT		(NIX_LFBASE | 0xb00)
+#define	NIX_LF_CQ_OP_DOOR		(NIX_LFBASE | 0xb30)
+#define	NIX_LF_CQ_OP_STATUS		(NIX_LFBASE | 0xb40)
+#define	NIX_LF_QINTX_CNT(a)		(NIX_LFBASE | 0xC00 | (a) << 12)
+#define	NIX_LF_QINTX_INT(a)		(NIX_LFBASE | 0xC10 | (a) << 12)
+#define	NIX_LF_QINTX_INT_W1S(a)		(NIX_LFBASE | 0xC18 | (a) << 12)
+#define	NIX_LF_QINTX_ENA_W1S(a)		(NIX_LFBASE | 0xC20 | (a) << 12)
+#define	NIX_LF_QINTX_ENA_W1C(a)		(NIX_LFBASE | 0xC30 | (a) << 12)
+#define	NIX_LF_CINTX_CNT(a)		(NIX_LFBASE | 0xD00 | (a) << 12)
+#define	NIX_LF_CINTX_WAIT(a)		(NIX_LFBASE | 0xD10 | (a) << 12)
+#define	NIX_LF_CINTX_INT(a)		(NIX_LFBASE | 0xD20 | (a) << 12)
+#define	NIX_LF_CINTX_INT_W1S(a)		(NIX_LFBASE | 0xD30 | (a) << 12)
+#define	NIX_LF_CINTX_ENA_W1S(a)		(NIX_LFBASE | 0xD40 | (a) << 12)
+#define	NIX_LF_CINTX_ENA_W1C(a)		(NIX_LFBASE | 0xD50 | (a) << 12)
+
+/* NIX AF transmit scheduler registers */
+#define NIX_AF_SMQX_CFG(a)		(0x700 | (a) << 16)
+#define NIX_AF_TL1X_SCHEDULE(a)		(0xC00 | (a) << 16)
+#define NIX_AF_TL1X_CIR(a)		(0xC20 | (a) << 16)
+#define NIX_AF_TL1X_TOPOLOGY(a)		(0xC80 | (a) << 16)
+#define NIX_AF_TL2X_PARENT(a)		(0xE88 | (a) << 16)
+#define NIX_AF_TL2X_SCHEDULE(a)		(0xE00 | (a) << 16)
+#define NIX_AF_TL3X_PARENT(a)		(0x1088 | (a) << 16)
+#define NIX_AF_TL3X_SCHEDULE(a)		(0x1000 | (a) << 16)
+#define NIX_AF_TL4X_PARENT(a)		(0x1288 | (a) << 16)
+#define NIX_AF_TL4X_SCHEDULE(a)		(0x1200 | (a) << 16)
+#define NIX_AF_MDQX_SCHEDULE(a)		(0x1400 | (a) << 16)
+#define NIX_AF_MDQX_PARENT(a)		(0x1480 | (a) << 16)
+#define NIX_AF_TL3_TL2X_LINKX_CFG(a, b)	(0x1700 | (a) << 16 | (b) << 3)
+
+/* LMT LF registers */
+#define LMT_LFBASE			BIT_ULL(RVU_FUNC_BLKADDR_SHIFT)
+#define LMT_LF_LMTLINEX(a)		(LMT_LFBASE | 0x000 | (a) << 12)
+#define LMT_LF_LMTCANCEL		(LMT_LFBASE | 0x400)
+
+#endif /* OTX2_REG_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
new file mode 100644
index 000000000000..cba59ddf71bb
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef OTX2_STRUCT_H
+#define OTX2_STRUCT_H
+
+/* NIX WQE/CQE size 128 byte or 512 byte */
+enum nix_cqesz_e {
+	NIX_XQESZ_W64 = 0x0,
+	NIX_XQESZ_W16 = 0x1,
+};
+
+enum nix_sqes_e {
+	NIX_SQESZ_W16 = 0x0,
+	NIX_SQESZ_W8 = 0x1,
+};
+
+enum nix_send_ldtype {
+	NIX_SEND_LDTYPE_LDD  = 0x0,
+	NIX_SEND_LDTYPE_LDT  = 0x1,
+	NIX_SEND_LDTYPE_LDWB = 0x2,
+};
+
+/* CSUM offload */
+enum nix_sendl3type {
+	NIX_SENDL3TYPE_NONE = 0x0,
+	NIX_SENDL3TYPE_IP4 = 0x2,
+	NIX_SENDL3TYPE_IP4_CKSUM = 0x3,
+	NIX_SENDL3TYPE_IP6 = 0x4,
+};
+
+enum nix_sendl4type {
+	NIX_SENDL4TYPE_NONE,
+	NIX_SENDL4TYPE_TCP_CKSUM,
+	NIX_SENDL4TYPE_SCTP_CKSUM,
+	NIX_SENDL4TYPE_UDP_CKSUM,
+};
+
+/* NIX wqe/cqe types */
+enum nix_xqe_type {
+	NIX_XQE_TYPE_INVALID   = 0x0,
+	NIX_XQE_TYPE_RX        = 0x1,
+	NIX_XQE_TYPE_RX_IPSECS = 0x2,
+	NIX_XQE_TYPE_RX_IPSECH = 0x3,
+	NIX_XQE_TYPE_RX_IPSECD = 0x4,
+	NIX_XQE_TYPE_SEND      = 0x8,
+};
+
+/* NIX CQE/SQE subdescriptor types */
+enum nix_subdc {
+	NIX_SUBDC_NOP  = 0x0,
+	NIX_SUBDC_EXT  = 0x1,
+	NIX_SUBDC_CRC  = 0x2,
+	NIX_SUBDC_IMM  = 0x3,
+	NIX_SUBDC_SG   = 0x4,
+	NIX_SUBDC_MEM  = 0x5,
+	NIX_SUBDC_JUMP = 0x6,
+	NIX_SUBDC_WORK = 0x7,
+	NIX_SUBDC_SOD  = 0xf,
+};
+
+/* Algorithm for nix_sqe_mem_s header (value of the `alg` field) */
+enum nix_sendmemalg {
+	NIX_SENDMEMALG_E_SET       = 0x0,
+	NIX_SENDMEMALG_E_SETTSTMP  = 0x1,
+	NIX_SENDMEMALG_E_SETRSLT   = 0x2,
+	NIX_SENDMEMALG_E_ADD       = 0x8,
+	NIX_SENDMEMALG_E_SUB       = 0x9,
+	NIX_SENDMEMALG_E_ADDLEN    = 0xa,
+	NIX_SENDMEMALG_E_SUBLEN    = 0xb,
+	NIX_SENDMEMALG_E_ADDMBUF   = 0xc,
+	NIX_SENDMEMALG_E_SUBMBUF   = 0xd,
+	NIX_SENDMEMALG_E_ENUM_LAST = 0xe,
+};
+
+/* NIX CQE header structure */
+struct nix_cqe_hdr_s {
+	u64 flow_tag              : 32;
+	u64 q                     : 20;
+	u64 reserved_52_57        : 6;
+	u64 node                  : 2;
+	u64 cqe_type              : 4;
+};
+
+/* NIX CQE RX parse structure */
+struct nix_rx_parse_s {
+	u64 chan         : 12;
+	u64 desc_sizem1  : 5;
+	u64 rsvd_17      : 1;
+	u64 express      : 1;
+	u64 wqwd         : 1;
+	u64 errlev       : 4;
+	u64 errcode      : 8;
+	u64 latype       : 4;
+	u64 lbtype       : 4;
+	u64 lctype       : 4;
+	u64 ldtype       : 4;
+	u64 letype       : 4;
+	u64 lftype       : 4;
+	u64 lgtype       : 4;
+	u64 lhtype       : 4;
+	u64 pkt_lenm1    : 16; /* W1 */
+	u64 l2m          : 1;
+	u64 l2b          : 1;
+	u64 l3m          : 1;
+	u64 l3b          : 1;
+	u64 vtag0_valid  : 1;
+	u64 vtag0_gone   : 1;
+	u64 vtag1_valid  : 1;
+	u64 vtag1_gone   : 1;
+	u64 pkind        : 6;
+	u64 rsvd_95_94   : 2;
+	u64 vtag0_tci    : 16;
+	u64 vtag1_tci    : 16;
+	u64 laflags      : 8; /* W2 */
+	u64 lbflags      : 8;
+	u64 lcflags      : 8;
+	u64 ldflags      : 8;
+	u64 leflags      : 8;
+	u64 lfflags      : 8;
+	u64 lgflags      : 8;
+	u64 lhflags      : 8;
+	u64 eoh_ptr      : 8; /* W3 */
+	u64 wqe_aura     : 20;
+	u64 pb_aura      : 20;
+	u64 match_id     : 16;
+	u64 laptr        : 8; /* W4 */
+	u64 lbptr        : 8;
+	u64 lcptr        : 8;
+	u64 ldptr        : 8;
+	u64 leptr        : 8;
+	u64 lfptr        : 8;
+	u64 lgptr        : 8;
+	u64 lhptr        : 8;
+	u64 vtag0_ptr    : 8; /* W5 */
+	u64 vtag1_ptr    : 8;
+	u64 flow_key_alg : 5;
+	u64 rsvd_383_341 : 43;
+	u64 rsvd_447_384;     /* W6 */
+};
+
+/* NIX CQE RX scatter/gather subdescriptor structure */
+struct nix_rx_sg_s {
+	u64 seg_size   : 16; /* W0 */
+	u64 seg2_size  : 16;
+	u64 seg3_size  : 16;
+	u64 segs       : 2;
+	u64 rsvd_59_50 : 10;
+	u64 subdc      : 4;
+	u64 seg_addr;
+	u64 seg2_addr;
+	u64 seg3_addr;
+};
+
+struct nix_send_comp_s {
+	u64 status	: 8;
+	u64 sqe_id	: 16;
+	u64 rsvd_24_63	: 40;
+};
+
+struct nix_cqe_rx_s {
+	struct nix_cqe_hdr_s  hdr;
+	struct nix_rx_parse_s parse;
+	struct nix_rx_sg_s sg;
+};
+
+struct nix_cqe_tx_s {
+	struct nix_cqe_hdr_s  hdr;
+	struct nix_send_comp_s comp;
+};
+
+/* NIX SQE header structure */
+struct nix_sqe_hdr_s {
+	u64 total		: 18; /* W0 */
+	u64 reserved_18		: 1;
+	u64 df			: 1;
+	u64 aura		: 20;
+	u64 sizem1		: 3;
+	u64 pnc			: 1;
+	u64 sq			: 20;
+	u64 ol3ptr		: 8; /* W1 */
+	u64 ol4ptr		: 8;
+	u64 il3ptr		: 8;
+	u64 il4ptr		: 8;
+	u64 ol3type		: 4;
+	u64 ol4type		: 4;
+	u64 il3type		: 4;
+	u64 il4type		: 4;
+	u64 sqe_id		: 16;
+
+};
+
+/* NIX send extended header subdescriptor structure */
+struct nix_sqe_ext_s {
+	u64 lso_mps       : 14; /* W0 */
+	u64 lso           : 1;
+	u64 tstmp         : 1;
+	u64 lso_sb        : 8;
+	u64 lso_format    : 5;
+	u64 rsvd_31_29    : 3;
+	u64 shp_chg       : 9;
+	u64 shp_dis       : 1;
+	u64 shp_ra        : 2;
+	u64 markptr       : 8;
+	u64 markform      : 7;
+	u64 mark_en       : 1;
+	u64 subdc         : 4;
+	u64 vlan0_ins_ptr : 8; /* W1 */
+	u64 vlan0_ins_tci : 16;
+	u64 vlan1_ins_ptr : 8;
+	u64 vlan1_ins_tci : 16;
+	u64 vlan0_ins_ena : 1;
+	u64 vlan1_ins_ena : 1;
+	u64 rsvd_127_114  : 14;
+};
+
+struct nix_sqe_sg_s {
+	u64 seg1_size	: 16;
+	u64 seg2_size	: 16;
+	u64 seg3_size	: 16;
+	u64 segs	: 2;
+	u64 rsvd_54_50	: 5;
+	u64 i1		: 1;
+	u64 i2		: 1;
+	u64 i3		: 1;
+	u64 ld_type	: 2;
+	u64 subdc	: 4;
+};
+
+/* NIX send memory subdescriptor structure */
+struct nix_sqe_mem_s {
+	u64 offset        : 16; /* W0 */
+	u64 rsvd_52_16    : 37;
+	u64 wmem          : 1;
+	u64 dsz           : 2;
+	u64 alg           : 4;
+	u64 subdc         : 4;
+	u64 addr; /* W1 */
+};
+
+enum nix_cqerrint_e {
+	NIX_CQERRINT_DOOR_ERR = 0,
+	NIX_CQERRINT_WR_FULL = 1,
+	NIX_CQERRINT_CQE_FAULT = 2,
+};
+
+#define NIX_CQERRINT_BITS (BIT_ULL(NIX_CQERRINT_DOOR_ERR) | \
+			   BIT_ULL(NIX_CQERRINT_CQE_FAULT))
+
+enum nix_rqint_e {
+	NIX_RQINT_DROP = 0,
+	NIX_RQINT_RED = 1,
+};
+
+#define NIX_RQINT_BITS (BIT_ULL(NIX_RQINT_DROP) | BIT_ULL(NIX_RQINT_RED))
+
+enum nix_sqint_e {
+	NIX_SQINT_LMT_ERR = 0,
+	NIX_SQINT_MNQ_ERR = 1,
+	NIX_SQINT_SEND_ERR = 2,
+	NIX_SQINT_SQB_ALLOC_FAIL = 3,
+};
+
+#define NIX_SQINT_BITS (BIT_ULL(NIX_SQINT_LMT_ERR) | \
+			BIT_ULL(NIX_SQINT_MNQ_ERR) | \
+			BIT_ULL(NIX_SQINT_SEND_ERR) | \
+			BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
+
+#endif /* OTX2_STRUCT_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
new file mode 100644
index 000000000000..bef4c20fe314
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -0,0 +1,848 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <net/tso.h>
+
+#include "otx2_reg.h"
+#include "otx2_common.h"
+#include "otx2_struct.h"
+#include "otx2_txrx.h"
+
+#define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx)))
+
+static struct nix_cqe_hdr_s *otx2_get_next_cqe(struct otx2_cq_queue *cq)
+{
+	struct nix_cqe_hdr_s *cqe_hdr;
+
+	cqe_hdr = (struct nix_cqe_hdr_s *)CQE_ADDR(cq, cq->cq_head);
+	if (cqe_hdr->cqe_type == NIX_XQE_TYPE_INVALID)
+		return NULL;
+
+	cq->cq_head++;
+	cq->cq_head &= (cq->cqe_cnt - 1);
+
+	return cqe_hdr;
+}
+
+static unsigned int frag_num(unsigned int i)
+{
+#ifdef __BIG_ENDIAN
+	return (i & ~3) + 3 - (i & 3);
+#else
+	return i;
+#endif
+}
+
+static dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf,
+					struct sk_buff *skb, int seg, int *len)
+{
+	const skb_frag_t *frag;
+	struct page *page;
+	int offset;
+
+	/* First segment is always skb->data */
+	if (!seg) {
+		page = virt_to_page(skb->data);
+		offset = offset_in_page(skb->data);
+		*len = skb_headlen(skb);
+	} else {
+		frag = &skb_shinfo(skb)->frags[seg - 1];
+		page = skb_frag_page(frag);
+		offset = skb_frag_off(frag);
+		*len = skb_frag_size(frag);
+	}
+	return otx2_dma_map_page(pfvf, page, offset, *len, DMA_TO_DEVICE);
+}
+
+static void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg)
+{
+	int seg;
+
+	for (seg = 0; seg < sg->num_segs; seg++) {
+		otx2_dma_unmap_page(pfvf, sg->dma_addr[seg],
+				    sg->size[seg], DMA_TO_DEVICE);
+	}
+	sg->num_segs = 0;
+}
+
+static void otx2_snd_pkt_handler(struct otx2_nic *pfvf,
+				 struct otx2_cq_queue *cq,
+				 struct otx2_snd_queue *sq,
+				 struct nix_cqe_tx_s *cqe,
+				 int budget, int *tx_pkts, int *tx_bytes)
+{
+	struct nix_send_comp_s *snd_comp = &cqe->comp;
+	struct sk_buff *skb = NULL;
+	struct sg_list *sg;
+
+	if (unlikely(snd_comp->status) && netif_msg_tx_err(pfvf))
+		net_err_ratelimited("%s: TX%d: Error in send CQ status:%x\n",
+				    pfvf->netdev->name, cq->cint_idx,
+				    snd_comp->status);
+
+	sg = &sq->sg[snd_comp->sqe_id];
+	skb = (struct sk_buff *)sg->skb;
+	if (unlikely(!skb))
+		return;
+
+	*tx_bytes += skb->len;
+	(*tx_pkts)++;
+	otx2_dma_unmap_skb_frags(pfvf, sg);
+	napi_consume_skb(skb, budget);
+	sg->skb = (u64)NULL;
+}
+
+static void otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb,
+			      u64 iova, int len)
+{
+	struct page *page;
+	void *va;
+
+	va = phys_to_virt(otx2_iova_to_phys(pfvf->iommu_domain, iova));
+	page = virt_to_page(va);
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+			va - page_address(page), len, pfvf->rbsize);
+
+	otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM,
+			    pfvf->rbsize, DMA_FROM_DEVICE);
+}
+
+static void otx2_set_rxhash(struct otx2_nic *pfvf,
+			    struct nix_cqe_rx_s *cqe, struct sk_buff *skb)
+{
+	enum pkt_hash_types hash_type = PKT_HASH_TYPE_NONE;
+	struct otx2_rss_info *rss;
+	u32 hash = 0;
+
+	if (!(pfvf->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	rss = &pfvf->hw.rss_info;
+	if (rss->flowkey_cfg) {
+		if (rss->flowkey_cfg &
+		    ~(NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6))
+			hash_type = PKT_HASH_TYPE_L4;
+		else
+			hash_type = PKT_HASH_TYPE_L3;
+		hash = cqe->hdr.flow_tag;
+	}
+	skb_set_hash(skb, hash, hash_type);
+}
+
+static bool otx2_check_rcv_errors(struct otx2_nic *pfvf,
+				  struct nix_cqe_rx_s *cqe, int qidx)
+{
+	struct otx2_drv_stats *stats = &pfvf->hw.drv_stats;
+	struct nix_rx_parse_s *parse = &cqe->parse;
+
+	if (netif_msg_rx_err(pfvf))
+		netdev_err(pfvf->netdev,
+			   "RQ%d: Error pkt with errlev:0x%x errcode:0x%x\n",
+			   qidx, parse->errlev, parse->errcode);
+
+	if (parse->errlev == NPC_ERRLVL_RE) {
+		switch (parse->errcode) {
+		case ERRCODE_FCS:
+		case ERRCODE_FCS_RCV:
+			atomic_inc(&stats->rx_fcs_errs);
+			break;
+		case ERRCODE_UNDERSIZE:
+			atomic_inc(&stats->rx_undersize_errs);
+			break;
+		case ERRCODE_OVERSIZE:
+			atomic_inc(&stats->rx_oversize_errs);
+			break;
+		case ERRCODE_OL2_LEN_MISMATCH:
+			atomic_inc(&stats->rx_len_errs);
+			break;
+		default:
+			atomic_inc(&stats->rx_other_errs);
+			break;
+		}
+	} else if (parse->errlev == NPC_ERRLVL_NIX) {
+		switch (parse->errcode) {
+		case ERRCODE_OL3_LEN:
+		case ERRCODE_OL4_LEN:
+		case ERRCODE_IL3_LEN:
+		case ERRCODE_IL4_LEN:
+			atomic_inc(&stats->rx_len_errs);
+			break;
+		case ERRCODE_OL4_CSUM:
+		case ERRCODE_IL4_CSUM:
+			atomic_inc(&stats->rx_csum_errs);
+			break;
+		default:
+			atomic_inc(&stats->rx_other_errs);
+			break;
+		}
+	} else {
+		atomic_inc(&stats->rx_other_errs);
+		/* For now ignore all the NPC parser errors and
+		 * pass the packets to stack.
+		 */
+		return false;
+	}
+
+	/* If RXALL is enabled pass on packets to stack. */
+	if (cqe->sg.segs && (pfvf->netdev->features & NETIF_F_RXALL))
+		return false;
+
+	/* Free buffer back to pool */
+	if (cqe->sg.segs)
+		otx2_aura_freeptr(pfvf, qidx, cqe->sg.seg_addr & ~0x07ULL);
+	return true;
+}
+
+static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf,
+				 struct napi_struct *napi,
+				 struct otx2_cq_queue *cq,
+				 struct nix_cqe_rx_s *cqe)
+{
+	struct nix_rx_parse_s *parse = &cqe->parse;
+	struct sk_buff *skb = NULL;
+
+	if (unlikely(parse->errlev || parse->errcode)) {
+		if (otx2_check_rcv_errors(pfvf, cqe, cq->cq_idx))
+			return;
+	}
+
+	skb = napi_get_frags(napi);
+	if (unlikely(!skb))
+		return;
+
+	otx2_skb_add_frag(pfvf, skb, cqe->sg.seg_addr, cqe->sg.seg_size);
+	cq->pool_ptrs++;
+
+	otx2_set_rxhash(pfvf, cqe, skb);
+
+	skb_record_rx_queue(skb, cq->cq_idx);
+	if (pfvf->netdev->features & NETIF_F_RXCSUM)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	napi_gro_frags(napi);
+}
+
+static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
+				struct napi_struct *napi,
+				struct otx2_cq_queue *cq, int budget)
+{
+	struct nix_cqe_rx_s *cqe;
+	int processed_cqe = 0;
+	s64 bufptr;
+
+	while (likely(processed_cqe < budget)) {
+		cqe = (struct nix_cqe_rx_s *)CQE_ADDR(cq, cq->cq_head);
+		if (cqe->hdr.cqe_type == NIX_XQE_TYPE_INVALID ||
+		    !cqe->sg.seg_addr) {
+			if (!processed_cqe)
+				return 0;
+			break;
+		}
+		cq->cq_head++;
+		cq->cq_head &= (cq->cqe_cnt - 1);
+
+		otx2_rcv_pkt_handler(pfvf, napi, cq, cqe);
+
+		cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
+		cqe->sg.seg_addr = 0x00;
+		processed_cqe++;
+	}
+
+	/* Free CQEs to HW */
+	otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR,
+		     ((u64)cq->cq_idx << 32) | processed_cqe);
+
+	if (unlikely(!cq->pool_ptrs))
+		return 0;
+
+	/* Refill pool with new buffers */
+	while (cq->pool_ptrs) {
+		bufptr = otx2_alloc_rbuf(pfvf, cq->rbpool, GFP_ATOMIC);
+		if (unlikely(bufptr <= 0)) {
+			struct refill_work *work;
+			struct delayed_work *dwork;
+
+			work = &pfvf->refill_wrk[cq->cq_idx];
+			dwork = &work->pool_refill_work;
+			/* Schedule a task if no other task is running */
+			if (!cq->refill_task_sched) {
+				cq->refill_task_sched = true;
+				schedule_delayed_work(dwork,
+						      msecs_to_jiffies(100));
+			}
+			break;
+		}
+		otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM);
+		cq->pool_ptrs--;
+	}
+
+	return processed_cqe;
+}
+
+static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
+				struct otx2_cq_queue *cq, int budget)
+{
+	int tx_pkts = 0, tx_bytes = 0;
+	struct nix_cqe_tx_s *cqe;
+	int processed_cqe = 0;
+
+	while (likely(processed_cqe < budget)) {
+		cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
+		if (unlikely(!cqe)) {
+			if (!processed_cqe)
+				return 0;
+			break;
+		}
+		otx2_snd_pkt_handler(pfvf, cq, &pfvf->qset.sq[cq->cint_idx],
+				     cqe, budget, &tx_pkts, &tx_bytes);
+
+		cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
+		processed_cqe++;
+	}
+
+	/* Free CQEs to HW */
+	otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR,
+		     ((u64)cq->cq_idx << 32) | processed_cqe);
+
+	if (likely(tx_pkts)) {
+		struct netdev_queue *txq;
+
+		txq = netdev_get_tx_queue(pfvf->netdev, cq->cint_idx);
+		netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+		/* Check if queue was stopped earlier due to ring full */
+		smp_mb();
+		if (netif_tx_queue_stopped(txq) &&
+		    netif_carrier_ok(pfvf->netdev))
+			netif_tx_wake_queue(txq);
+	}
+	return 0;
+}
+
+int otx2_napi_handler(struct napi_struct *napi, int budget)
+{
+	struct otx2_cq_poll *cq_poll;
+	int workdone = 0, cq_idx, i;
+	struct otx2_cq_queue *cq;
+	struct otx2_qset *qset;
+	struct otx2_nic *pfvf;
+
+	cq_poll = container_of(napi, struct otx2_cq_poll, napi);
+	pfvf = (struct otx2_nic *)cq_poll->dev;
+	qset = &pfvf->qset;
+
+	for (i = CQS_PER_CINT - 1; i >= 0; i--) {
+		cq_idx = cq_poll->cq_ids[i];
+		if (unlikely(cq_idx == CINT_INVALID_CQ))
+			continue;
+		cq = &qset->cq[cq_idx];
+		if (cq->cq_type == CQ_RX) {
+			/* If the RQ refill WQ task is running, skip napi
+			 * scheduler for this queue.
+			 */
+			if (cq->refill_task_sched)
+				continue;
+			workdone += otx2_rx_napi_handler(pfvf, napi,
+							 cq, budget);
+		} else {
+			workdone += otx2_tx_napi_handler(pfvf, cq, budget);
+		}
+	}
+
+	/* Clear the IRQ */
+	otx2_write64(pfvf, NIX_LF_CINTX_INT(cq_poll->cint_idx), BIT_ULL(0));
+
+	if (workdone < budget && napi_complete_done(napi, workdone)) {
+		/* If interface is going down, don't re-enable IRQ */
+		if (pfvf->flags & OTX2_FLAG_INTF_DOWN)
+			return workdone;
+
+		/* Re-enable interrupts */
+		otx2_write64(pfvf, NIX_LF_CINTX_ENA_W1S(cq_poll->cint_idx),
+			     BIT_ULL(0));
+	}
+	return workdone;
+}
+
+static void otx2_sqe_flush(struct otx2_snd_queue *sq, int size)
+{
+	u64 status;
+
+	/* Packet data stores should finish before SQE is flushed to HW */
+	dma_wmb();
+
+	do {
+		memcpy(sq->lmt_addr, sq->sqe_base, size);
+		status = otx2_lmt_flush(sq->io_addr);
+	} while (status == 0);
+
+	sq->head++;
+	sq->head &= (sq->sqe_cnt - 1);
+}
+
+#define MAX_SEGS_PER_SG	3
+/* Add SQE scatter/gather subdescriptor structure */
+static bool otx2_sqe_add_sg(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+			    struct sk_buff *skb, int num_segs, int *offset)
+{
+	struct nix_sqe_sg_s *sg = NULL;
+	u64 dma_addr, *iova = NULL;
+	u16 *sg_lens = NULL;
+	int seg, len;
+
+	sq->sg[sq->head].num_segs = 0;
+
+	for (seg = 0; seg < num_segs; seg++) {
+		if ((seg % MAX_SEGS_PER_SG) == 0) {
+			sg = (struct nix_sqe_sg_s *)(sq->sqe_base + *offset);
+			sg->ld_type = NIX_SEND_LDTYPE_LDD;
+			sg->subdc = NIX_SUBDC_SG;
+			sg->segs = 0;
+			sg_lens = (void *)sg;
+			iova = (void *)sg + sizeof(*sg);
+			/* Next subdc always starts at a 16byte boundary.
+			 * So if sg->segs is whether 2 or 3, offset += 16bytes.
+			 */
+			if ((num_segs - seg) >= (MAX_SEGS_PER_SG - 1))
+				*offset += sizeof(*sg) + (3 * sizeof(u64));
+			else
+				*offset += sizeof(*sg) + sizeof(u64);
+		}
+		dma_addr = otx2_dma_map_skb_frag(pfvf, skb, seg, &len);
+		if (dma_mapping_error(pfvf->dev, dma_addr))
+			return false;
+
+		sg_lens[frag_num(seg % MAX_SEGS_PER_SG)] = len;
+		sg->segs++;
+		*iova++ = dma_addr;
+
+		/* Save DMA mapping info for later unmapping */
+		sq->sg[sq->head].dma_addr[seg] = dma_addr;
+		sq->sg[sq->head].size[seg] = len;
+		sq->sg[sq->head].num_segs++;
+	}
+
+	sq->sg[sq->head].skb = (u64)skb;
+	return true;
+}
+
+/* Add SQE extended header subdescriptor */
+static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+			     struct sk_buff *skb, int *offset)
+{
+	struct nix_sqe_ext_s *ext;
+
+	ext = (struct nix_sqe_ext_s *)(sq->sqe_base + *offset);
+	ext->subdc = NIX_SUBDC_EXT;
+	if (skb_shinfo(skb)->gso_size) {
+		ext->lso = 1;
+		ext->lso_sb = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		ext->lso_mps = skb_shinfo(skb)->gso_size;
+
+		/* Only TSOv4 and TSOv6 GSO offloads are supported */
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+			ext->lso_format = pfvf->hw.lso_tsov4_idx;
+
+			/* HW adds payload size to 'ip_hdr->tot_len' while
+			 * sending TSO segment, hence set payload length
+			 * in IP header of the packet to just header length.
+			 */
+			ip_hdr(skb)->tot_len =
+				htons(ext->lso_sb - skb_network_offset(skb));
+		} else {
+			ext->lso_format = pfvf->hw.lso_tsov6_idx;
+			ipv6_hdr(skb)->payload_len =
+				htons(ext->lso_sb - skb_network_offset(skb));
+		}
+	}
+	*offset += sizeof(*ext);
+}
+
+/* Add SQE header subdescriptor structure */
+static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+			     struct nix_sqe_hdr_s *sqe_hdr,
+			     struct sk_buff *skb, u16 qidx)
+{
+	int proto = 0;
+
+	/* Check if SQE was framed before, if yes then no need to
+	 * set these constants again and again.
+	 */
+	if (!sqe_hdr->total) {
+		/* Don't free Tx buffers to Aura */
+		sqe_hdr->df = 1;
+		sqe_hdr->aura = sq->aura_id;
+		/* Post a CQE Tx after pkt transmission */
+		sqe_hdr->pnc = 1;
+		sqe_hdr->sq = qidx;
+	}
+	sqe_hdr->total = skb->len;
+	/* Set SQE identifier which will be used later for freeing SKB */
+	sqe_hdr->sqe_id = sq->head;
+
+	/* Offload TCP/UDP checksum to HW */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		sqe_hdr->ol3ptr = skb_network_offset(skb);
+		sqe_hdr->ol4ptr = skb_transport_offset(skb);
+		/* get vlan protocol Ethertype */
+		if (eth_type_vlan(skb->protocol))
+			skb->protocol = vlan_get_protocol(skb);
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			proto = ip_hdr(skb)->protocol;
+			/* In case of TSO, HW needs this to be explicitly set.
+			 * So set this always, instead of adding a check.
+			 */
+			sqe_hdr->ol3type = NIX_SENDL3TYPE_IP4_CKSUM;
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+			proto = ipv6_hdr(skb)->nexthdr;
+		}
+
+		if (proto == IPPROTO_TCP)
+			sqe_hdr->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+		else if (proto == IPPROTO_UDP)
+			sqe_hdr->ol4type = NIX_SENDL4TYPE_UDP_CKSUM;
+	}
+}
+
+static int otx2_dma_map_tso_skb(struct otx2_nic *pfvf,
+				struct otx2_snd_queue *sq,
+				struct sk_buff *skb, int sqe, int hdr_len)
+{
+	int num_segs = skb_shinfo(skb)->nr_frags + 1;
+	struct sg_list *sg = &sq->sg[sqe];
+	u64 dma_addr;
+	int seg, len;
+
+	sg->num_segs = 0;
+
+	/* Get payload length at skb->data */
+	len = skb_headlen(skb) - hdr_len;
+
+	for (seg = 0; seg < num_segs; seg++) {
+		/* Skip skb->data, if there is no payload */
+		if (!seg && !len)
+			continue;
+		dma_addr = otx2_dma_map_skb_frag(pfvf, skb, seg, &len);
+		if (dma_mapping_error(pfvf->dev, dma_addr))
+			goto unmap;
+
+		/* Save DMA mapping info for later unmapping */
+		sg->dma_addr[sg->num_segs] = dma_addr;
+		sg->size[sg->num_segs] = len;
+		sg->num_segs++;
+	}
+	return 0;
+unmap:
+	otx2_dma_unmap_skb_frags(pfvf, sg);
+	return -EINVAL;
+}
+
+static u64 otx2_tso_frag_dma_addr(struct otx2_snd_queue *sq,
+				  struct sk_buff *skb, int seg,
+				  u64 seg_addr, int hdr_len, int sqe)
+{
+	struct sg_list *sg = &sq->sg[sqe];
+	const skb_frag_t *frag;
+	int offset;
+
+	if (seg < 0)
+		return sg->dma_addr[0] + (seg_addr - (u64)skb->data);
+
+	frag = &skb_shinfo(skb)->frags[seg];
+	offset = seg_addr - (u64)skb_frag_address(frag);
+	if (skb_headlen(skb) - hdr_len)
+		seg++;
+	return sg->dma_addr[seg] + offset;
+}
+
+static void otx2_sqe_tso_add_sg(struct otx2_snd_queue *sq,
+				struct sg_list *list, int *offset)
+{
+	struct nix_sqe_sg_s *sg = NULL;
+	u16 *sg_lens = NULL;
+	u64 *iova = NULL;
+	int seg;
+
+	/* Add SG descriptors with buffer addresses */
+	for (seg = 0; seg < list->num_segs; seg++) {
+		if ((seg % MAX_SEGS_PER_SG) == 0) {
+			sg = (struct nix_sqe_sg_s *)(sq->sqe_base + *offset);
+			sg->ld_type = NIX_SEND_LDTYPE_LDD;
+			sg->subdc = NIX_SUBDC_SG;
+			sg->segs = 0;
+			sg_lens = (void *)sg;
+			iova = (void *)sg + sizeof(*sg);
+			/* Next subdc always starts at a 16byte boundary.
+			 * So if sg->segs is whether 2 or 3, offset += 16bytes.
+			 */
+			if ((list->num_segs - seg) >= (MAX_SEGS_PER_SG - 1))
+				*offset += sizeof(*sg) + (3 * sizeof(u64));
+			else
+				*offset += sizeof(*sg) + sizeof(u64);
+		}
+		sg_lens[frag_num(seg % MAX_SEGS_PER_SG)] = list->size[seg];
+		*iova++ = list->dma_addr[seg];
+		sg->segs++;
+	}
+}
+
+static void otx2_sq_append_tso(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
+			       struct sk_buff *skb, u16 qidx)
+{
+	struct netdev_queue *txq = netdev_get_tx_queue(pfvf->netdev, qidx);
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int tcp_data, seg_len, pkt_len, offset;
+	struct nix_sqe_hdr_s *sqe_hdr;
+	int first_sqe = sq->head;
+	struct sg_list list;
+	struct tso_t tso;
+
+	/* Map SKB's fragments to DMA.
+	 * It's done here to avoid mapping for every TSO segment's packet.
+	 */
+	if (otx2_dma_map_tso_skb(pfvf, sq, skb, first_sqe, hdr_len)) {
+		dev_kfree_skb_any(skb);
+		return;
+	}
+
+	netdev_tx_sent_queue(txq, skb->len);
+
+	tso_start(skb, &tso);
+	tcp_data = skb->len - hdr_len;
+	while (tcp_data > 0) {
+		char *hdr;
+
+		seg_len = min_t(int, skb_shinfo(skb)->gso_size, tcp_data);
+		tcp_data -= seg_len;
+
+		/* Set SQE's SEND_HDR */
+		memset(sq->sqe_base, 0, sq->sqe_size);
+		sqe_hdr = (struct nix_sqe_hdr_s *)(sq->sqe_base);
+		otx2_sqe_add_hdr(pfvf, sq, sqe_hdr, skb, qidx);
+		offset = sizeof(*sqe_hdr);
+
+		/* Add TSO segment's pkt header */
+		hdr = sq->tso_hdrs->base + (sq->head * TSO_HEADER_SIZE);
+		tso_build_hdr(skb, hdr, &tso, seg_len, tcp_data == 0);
+		list.dma_addr[0] =
+			sq->tso_hdrs->iova + (sq->head * TSO_HEADER_SIZE);
+		list.size[0] = hdr_len;
+		list.num_segs = 1;
+
+		/* Add TSO segment's payload data fragments */
+		pkt_len = hdr_len;
+		while (seg_len > 0) {
+			int size;
+
+			size = min_t(int, tso.size, seg_len);
+
+			list.size[list.num_segs] = size;
+			list.dma_addr[list.num_segs] =
+				otx2_tso_frag_dma_addr(sq, skb,
+						       tso.next_frag_idx - 1,
+						       (u64)tso.data, hdr_len,
+						       first_sqe);
+			list.num_segs++;
+			pkt_len += size;
+			seg_len -= size;
+			tso_build_data(skb, &tso, size);
+		}
+		sqe_hdr->total = pkt_len;
+		otx2_sqe_tso_add_sg(sq, &list, &offset);
+
+		/* DMA mappings and skb needs to be freed only after last
+		 * TSO segment is transmitted out. So set 'PNC' only for
+		 * last segment. Also point last segment's sqe_id to first
+		 * segment's SQE index where skb address and DMA mappings
+		 * are saved.
+		 */
+		if (!tcp_data) {
+			sqe_hdr->pnc = 1;
+			sqe_hdr->sqe_id = first_sqe;
+			sq->sg[first_sqe].skb = (u64)skb;
+		} else {
+			sqe_hdr->pnc = 0;
+		}
+
+		sqe_hdr->sizem1 = (offset / 16) - 1;
+
+		/* Flush SQE to HW */
+		otx2_sqe_flush(sq, offset);
+	}
+}
+
+static bool is_hw_tso_supported(struct otx2_nic *pfvf,
+				struct sk_buff *skb)
+{
+	int payload_len, last_seg_size;
+
+	if (!pfvf->hw.hw_tso)
+		return false;
+
+	/* HW has an issue due to which when the payload of the last LSO
+	 * segment is shorter than 16 bytes, some header fields may not
+	 * be correctly modified, hence don't offload such TSO segments.
+	 */
+	if (!is_96xx_B0(pfvf->pdev))
+		return true;
+
+	payload_len = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
+	last_seg_size = payload_len % skb_shinfo(skb)->gso_size;
+	if (last_seg_size && last_seg_size < 16)
+		return false;
+
+	return true;
+}
+
+static int otx2_get_sqe_count(struct otx2_nic *pfvf, struct sk_buff *skb)
+{
+	if (!skb_shinfo(skb)->gso_size)
+		return 1;
+
+	/* HW TSO */
+	if (is_hw_tso_supported(pfvf, skb))
+		return 1;
+
+	/* SW TSO */
+	return skb_shinfo(skb)->gso_segs;
+}
+
+bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
+			struct sk_buff *skb, u16 qidx)
+{
+	struct netdev_queue *txq = netdev_get_tx_queue(netdev, qidx);
+	struct otx2_nic *pfvf = netdev_priv(netdev);
+	int offset, num_segs, free_sqe;
+	struct nix_sqe_hdr_s *sqe_hdr;
+
+	/* Check if there is room for new SQE.
+	 * 'Num of SQBs freed to SQ's pool - SQ's Aura count'
+	 * will give free SQE count.
+	 */
+	free_sqe = (sq->num_sqbs - *sq->aura_fc_addr) * sq->sqe_per_sqb;
+
+	if (free_sqe < sq->sqe_thresh ||
+	    free_sqe < otx2_get_sqe_count(pfvf, skb))
+		return false;
+
+	num_segs = skb_shinfo(skb)->nr_frags + 1;
+
+	/* If SKB doesn't fit in a single SQE, linearize it.
+	 * TODO: Consider adding JUMP descriptor instead.
+	 */
+	if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
+		if (__skb_linearize(skb)) {
+			dev_kfree_skb_any(skb);
+			return true;
+		}
+		num_segs = skb_shinfo(skb)->nr_frags + 1;
+	}
+
+	if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
+		otx2_sq_append_tso(pfvf, sq, skb, qidx);
+		return true;
+	}
+
+	/* Set SQE's SEND_HDR.
+	 * Do not clear the first 64bit as it contains constant info.
+	 */
+	memset(sq->sqe_base + 8, 0, sq->sqe_size - 8);
+	sqe_hdr = (struct nix_sqe_hdr_s *)(sq->sqe_base);
+	otx2_sqe_add_hdr(pfvf, sq, sqe_hdr, skb, qidx);
+	offset = sizeof(*sqe_hdr);
+
+	/* Add extended header if needed */
+	otx2_sqe_add_ext(pfvf, sq, skb, &offset);
+
+	/* Add SG subdesc with data frags */
+	if (!otx2_sqe_add_sg(pfvf, sq, skb, num_segs, &offset)) {
+		otx2_dma_unmap_skb_frags(pfvf, &sq->sg[sq->head]);
+		return false;
+	}
+
+	sqe_hdr->sizem1 = (offset / 16) - 1;
+
+	netdev_tx_sent_queue(txq, skb->len);
+
+	/* Flush SQE to HW */
+	otx2_sqe_flush(sq, offset);
+
+	return true;
+}
+
+void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+{
+	struct nix_cqe_rx_s *cqe;
+	int processed_cqe = 0;
+	u64 iova, pa;
+
+	while ((cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq))) {
+		if (!cqe->sg.subdc)
+			continue;
+		iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM;
+		pa = otx2_iova_to_phys(pfvf->iommu_domain, iova);
+		otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE);
+		put_page(virt_to_page(phys_to_virt(pa)));
+		processed_cqe++;
+	}
+
+	/* Free CQEs to HW */
+	otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR,
+		     ((u64)cq->cq_idx << 32) | processed_cqe);
+}
+
+void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
+{
+	struct sk_buff *skb = NULL;
+	struct otx2_snd_queue *sq;
+	struct nix_cqe_tx_s *cqe;
+	int processed_cqe = 0;
+	struct sg_list *sg;
+
+	sq = &pfvf->qset.sq[cq->cint_idx];
+
+	while ((cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq))) {
+		sg = &sq->sg[cqe->comp.sqe_id];
+		skb = (struct sk_buff *)sg->skb;
+		if (skb) {
+			otx2_dma_unmap_skb_frags(pfvf, sg);
+			dev_kfree_skb_any(skb);
+			sg->skb = (u64)NULL;
+		}
+		processed_cqe++;
+	}
+
+	/* Free CQEs to HW */
+	otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR,
+		     ((u64)cq->cq_idx << 32) | processed_cqe);
+}
+
+int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable)
+{
+	struct msg_req *msg;
+	int err;
+
+	otx2_mbox_lock(&pfvf->mbox);
+	if (enable)
+		msg = otx2_mbox_alloc_msg_nix_lf_start_rx(&pfvf->mbox);
+	else
+		msg = otx2_mbox_alloc_msg_nix_lf_stop_rx(&pfvf->mbox);
+
+	if (!msg) {
+		otx2_mbox_unlock(&pfvf->mbox);
+		return -ENOMEM;
+	}
+
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	otx2_mbox_unlock(&pfvf->mbox);
+	return err;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
new file mode 100644
index 000000000000..4ab32d3adb78
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef OTX2_TXRX_H
+#define OTX2_TXRX_H
+
+#include <linux/etherdevice.h>
+#include <linux/iommu.h>
+#include <linux/if_vlan.h>
+
+#define LBK_CHAN_BASE	0x000
+#define SDP_CHAN_BASE	0x700
+#define CGX_CHAN_BASE	0x800
+
+#define OTX2_DATA_ALIGN(X)	ALIGN(X, OTX2_ALIGN)
+#define OTX2_HEAD_ROOM		OTX2_ALIGN
+
+#define	OTX2_ETH_HLEN		(VLAN_ETH_HLEN + VLAN_HLEN)
+#define	OTX2_MIN_MTU		64
+#define	OTX2_MAX_MTU		(9212 - OTX2_ETH_HLEN)
+
+#define OTX2_MAX_GSO_SEGS	255
+#define OTX2_MAX_FRAGS_IN_SQE	9
+
+/* Rx buffer size should be in multiples of 128bytes */
+#define RCV_FRAG_LEN1(x)				\
+		((OTX2_HEAD_ROOM + OTX2_DATA_ALIGN(x)) + \
+		OTX2_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+/* Prefer 2048 byte buffers for better last level cache
+ * utilization or data distribution across regions.
+ */
+#define RCV_FRAG_LEN(x)	\
+		((RCV_FRAG_LEN1(x) < 2048) ? 2048 : RCV_FRAG_LEN1(x))
+
+#define DMA_BUFFER_LEN(x)		\
+		((x) - OTX2_HEAD_ROOM - \
+		OTX2_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+/* IRQ triggered when NIX_LF_CINTX_CNT[ECOUNT]
+ * is equal to this value.
+ */
+#define CQ_CQE_THRESH_DEFAULT	10
+
+/* IRQ triggered when NIX_LF_CINTX_CNT[ECOUNT]
+ * is nonzero and this much time elapses after that.
+ */
+#define CQ_TIMER_THRESH_DEFAULT	1  /* 1 usec */
+#define CQ_TIMER_THRESH_MAX     25 /* 25 usec */
+
+/* Min number of CQs (of the ones mapped to this CINT)
+ * with valid CQEs.
+ */
+#define CQ_QCOUNT_DEFAULT	1
+
+struct queue_stats {
+	u64	bytes;
+	u64	pkts;
+};
+
+struct otx2_rcv_queue {
+	struct queue_stats	stats;
+};
+
+struct sg_list {
+	u16	num_segs;
+	u64	skb;
+	u64	size[OTX2_MAX_FRAGS_IN_SQE];
+	u64	dma_addr[OTX2_MAX_FRAGS_IN_SQE];
+};
+
+struct otx2_snd_queue {
+	u8			aura_id;
+	u16			head;
+	u16			sqe_size;
+	u32			sqe_cnt;
+	u16			num_sqbs;
+	u16			sqe_thresh;
+	u8			sqe_per_sqb;
+	u64			 io_addr;
+	u64			*aura_fc_addr;
+	u64			*lmt_addr;
+	void			*sqe_base;
+	struct qmem		*sqe;
+	struct qmem		*tso_hdrs;
+	struct sg_list		*sg;
+	struct queue_stats	stats;
+	u16			sqb_count;
+	u64			*sqb_ptrs;
+} ____cacheline_aligned_in_smp;
+
+enum cq_type {
+	CQ_RX,
+	CQ_TX,
+	CQS_PER_CINT = 2, /* RQ + SQ */
+};
+
+struct otx2_cq_poll {
+	void			*dev;
+#define CINT_INVALID_CQ		255
+	u8			cint_idx;
+	u8			cq_ids[CQS_PER_CINT];
+	struct napi_struct	napi;
+};
+
+struct otx2_pool {
+	struct qmem		*stack;
+	struct qmem		*fc_addr;
+	u8			rbpage_order;
+	u16			rbsize;
+	u32			page_offset;
+	u16			pageref;
+	struct page		*page;
+};
+
+struct otx2_cq_queue {
+	u8			cq_idx;
+	u8			cq_type;
+	u8			cint_idx; /* CQ interrupt id */
+	u8			refill_task_sched;
+	u16			cqe_size;
+	u16			pool_ptrs;
+	u32			cqe_cnt;
+	u32			cq_head;
+	void			*cqe_base;
+	struct qmem		*cqe;
+	struct otx2_pool	*rbpool;
+} ____cacheline_aligned_in_smp;
+
+struct otx2_qset {
+	u32			rqe_cnt;
+	u32			sqe_cnt; /* Keep these two at top */
+#define OTX2_MAX_CQ_CNT		64
+	u16			cq_cnt;
+	u16			xqe_size;
+	struct otx2_pool	*pool;
+	struct otx2_cq_poll	*napi;
+	struct otx2_cq_queue	*cq;
+	struct otx2_snd_queue	*sq;
+	struct otx2_rcv_queue	*rq;
+};
+
+/* Translate IOVA to physical address */
+static inline u64 otx2_iova_to_phys(void *iommu_domain, dma_addr_t dma_addr)
+{
+	/* Translation is installed only when IOMMU is present */
+	if (likely(iommu_domain))
+		return iommu_iova_to_phys(iommu_domain, dma_addr);
+	return dma_addr;
+}
+
+int otx2_napi_handler(struct napi_struct *napi, int budget);
+bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
+			struct sk_buff *skb, u16 qidx);
+#endif /* OTX2_TXRX_H */
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 3fb7ee3d4d13..7a0d785b826c 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -742,7 +742,7 @@ txq_reclaim_end:
 	return released;
 }
 
-static void pxa168_eth_tx_timeout(struct net_device *dev)
+static void pxa168_eth_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 
@@ -1344,15 +1344,6 @@ static int pxa168_smi_write(struct mii_bus *bus, int phy_addr, int regnum,
 	return 0;
 }
 
-static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr,
-			       int cmd)
-{
-	if (dev->phydev)
-		return phy_mii_ioctl(dev->phydev, ifr, cmd);
-
-	return -EOPNOTSUPP;
-}
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void pxa168_eth_netpoll(struct net_device *dev)
 {
@@ -1387,7 +1378,7 @@ static const struct net_device_ops pxa168_eth_netdev_ops = {
 	.ndo_set_rx_mode	= pxa168_eth_set_rx_mode,
 	.ndo_set_mac_address	= pxa168_eth_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_do_ioctl		= pxa168_eth_do_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl,
 	.ndo_change_mtu		= pxa168_eth_change_mtu,
 	.ndo_tx_timeout		= pxa168_eth_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 095f6c71b4fa..97f270d30cce 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -2884,7 +2884,7 @@ static void skge_tx_clean(struct net_device *dev)
 	skge->tx_ring.to_clean = e;
 }
 
-static void skge_tx_timeout(struct net_device *dev)
+static void skge_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct skge_port *skge = netdev_priv(dev);
 
@@ -3932,7 +3932,7 @@ static int skge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	spin_lock_init(&hw->phy_lock);
 	tasklet_init(&hw->phy_task, skge_extirq, (unsigned long) hw);
 
-	hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000);
+	hw->regs = ioremap(pci_resource_start(pdev, 0), 0x4000);
 	if (!hw->regs) {
 		dev_err(&pdev->dev, "cannot map device registers\n");
 		goto err_out_free_hw;
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 5f56ee83e3b1..ebfd0ceac884 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -2358,7 +2358,7 @@ static void sky2_qlink_intr(struct sky2_hw *hw)
 /* Transmit timeout is only called if we are running, carrier is up
  * and tx queue is full (stopped).
  */
-static void sky2_tx_timeout(struct net_device *dev)
+static void sky2_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
@@ -5022,7 +5022,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	hw->pdev = pdev;
 	sprintf(hw->irq_name, DRV_NAME "@pci:%s", pci_name(pdev));
 
-	hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000);
+	hw->regs = ioremap(pci_resource_start(pdev, 0), 0x4000);
 	if (!hw->regs) {
 		dev_err(&pdev->dev, "cannot map device registers\n");
 		goto err_out_free_hw;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 527ad2aadcca..8c6cfd15481c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2081,7 +2081,7 @@ static void mtk_dma_free(struct mtk_eth *eth)
 	kfree(eth->scratch_head);
 }
 
-static void mtk_tx_timeout(struct net_device *dev)
+static void mtk_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 7af75b63245f..43dcbd8214c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1363,24 +1363,18 @@ static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv)
 	}
 }
 
-static void mlx4_en_tx_timeout(struct net_device *dev)
+static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int i;
+	struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][txqueue];
 
 	if (netif_msg_timer(priv))
 		en_warn(priv, "Tx timeout called on port:%d\n", priv->port);
 
-	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
-		struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i];
-
-		if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
-			continue;
-		en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
-			i, tx_ring->qpn, tx_ring->sp_cqn,
-			tx_ring->cons, tx_ring->prod);
-	}
+	en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
+		txqueue, tx_ring->qpn, tx_ring->sp_cqn,
+		tx_ring->cons, tx_ring->prod);
 
 	priv->port_stats.tx_timeout++;
 	en_dbg(DRV, priv, "Scheduling watchdog\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a6f390fdb971..d3e06cec8317 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -42,7 +42,7 @@ mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
 # Core extra
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
-				      ecpf.o rdma.o
+				      ecpf.o rdma.o eswitch_offloads_chains.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 549f962cd86e..42198e64a7f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -71,8 +71,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
 	return cpu_handle;
 }
 
-int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-			struct mlx5_frag_buf *buf, int node)
+static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+			       struct mlx5_frag_buf *buf, int node)
 {
 	dma_addr_t t;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9c8427698238..220ef9f06f84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -135,7 +135,7 @@ struct page_pool;
 #define MLX5E_LOG_INDIR_RQT_SIZE       0x7
 #define MLX5E_INDIR_RQT_SIZE           BIT(MLX5E_LOG_INDIR_RQT_SIZE)
 #define MLX5E_MIN_NUM_CHANNELS         0x1
-#define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE >> 1)
+#define MLX5E_MAX_NUM_CHANNELS         MLX5E_INDIR_RQT_SIZE
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
 #define MLX5E_TX_XSK_POLL_BUDGET       64
@@ -892,6 +892,8 @@ struct mlx5e_profile {
 	int	(*update_rx)(struct mlx5e_priv *priv);
 	void	(*update_stats)(struct mlx5e_priv *priv);
 	void	(*update_carrier)(struct mlx5e_priv *priv);
+	unsigned int (*stats_grps_num)(struct mlx5e_priv *priv);
+	mlx5e_stats_grp_t *stats_grps;
 	struct {
 		mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 		mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
@@ -964,7 +966,6 @@ struct sk_buff *
 mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 			     struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt);
 
-void mlx5e_update_stats(struct mlx5e_priv *priv);
 void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
 
@@ -1175,11 +1176,11 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv);
 void mlx5e_detach_netdev(struct mlx5e_priv *priv);
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
-void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+void mlx5e_build_nic_params(struct mlx5e_priv *priv,
 			    struct mlx5e_xsk *xsk,
 			    struct mlx5e_rss_params *rss_params,
 			    struct mlx5e_params *params,
-			    u16 max_channels, u16 mtu);
+			    u16 mtu);
 void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
 			   struct mlx5e_params *params);
 void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index d48292ccda29..0416f7712109 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -21,6 +21,7 @@ struct mlx5e_tc_table {
 	DECLARE_HASHTABLE(hairpin_tbl, 8);
 
 	struct notifier_block     netdevice_nb;
+	struct netdev_net_notifier	netdevice_nn;
 };
 
 struct mlx5e_flow_table {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 475b6bd5d29b..62fc8a128a8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -35,7 +35,7 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
 	 */
 	dma_info->addr = xdp_umem_get_dma(umem, handle);
 
-	xsk_umem_discard_addr_rq(umem);
+	xsk_umem_release_addr_rq(umem);
 
 	dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
 				   DMA_BIDIRECTIONAL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 778dab1af8fc..f260dd96873b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -180,7 +180,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
 
 struct tx_sync_info {
 	u64 rcd_sn;
-	s32 sync_len;
+	u32 sync_len;
 	int nr_frags;
 	skb_frag_t frags[MAX_SKB_FRAGS];
 };
@@ -193,13 +193,14 @@ enum mlx5e_ktls_sync_retval {
 
 static enum mlx5e_ktls_sync_retval
 tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
-		 u32 tcp_seq, struct tx_sync_info *info)
+		 u32 tcp_seq, int datalen, struct tx_sync_info *info)
 {
 	struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
 	enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE;
 	struct tls_record_info *record;
 	int remaining, i = 0;
 	unsigned long flags;
+	bool ends_before;
 
 	spin_lock_irqsave(&tx_ctx->lock, flags);
 	record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
@@ -209,9 +210,21 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
 		goto out;
 	}
 
-	if (unlikely(tcp_seq < tls_record_start_seq(record))) {
-		ret = tls_record_is_start_marker(record) ?
-			MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
+	/* There are the following cases:
+	 * 1. packet ends before start marker: bypass offload.
+	 * 2. packet starts before start marker and ends after it: drop,
+	 *    not supported, breaks contract with kernel.
+	 * 3. packet ends before tls record info starts: drop,
+	 *    this packet was already acknowledged and its record info
+	 *    was released.
+	 */
+	ends_before = before(tcp_seq + datalen, tls_record_start_seq(record));
+
+	if (unlikely(tls_record_is_start_marker(record))) {
+		ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
+		goto out;
+	} else if (ends_before) {
+		ret = MLX5E_KTLS_SYNC_FAIL;
 		goto out;
 	}
 
@@ -337,7 +350,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 	u8 num_wqebbs;
 	int i = 0;
 
-	ret = tx_sync_info_get(priv_tx, seq, &info);
+	ret = tx_sync_info_get(priv_tx, seq, datalen, &info);
 	if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) {
 		if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) {
 			stats->tls_skip_no_sync_data++;
@@ -351,14 +364,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 		goto err_out;
 	}
 
-	if (unlikely(info.sync_len < 0)) {
-		if (likely(datalen <= -info.sync_len))
-			return MLX5E_KTLS_SYNC_DONE;
-
-		stats->tls_drop_bypass_req++;
-		goto err_out;
-	}
-
 	stats->tls_ooo++;
 
 	tx_post_resync_params(sq, priv_tx, info.rcd_sn);
@@ -378,8 +383,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 	if (unlikely(contig_wqebbs_room < num_wqebbs))
 		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 
-	tx_post_resync_params(sq, priv_tx, info.rcd_sn);
-
 	for (; i < info.nr_frags; i++) {
 		unsigned int orig_fsz, frag_offset = 0, n = 0;
 		skb_frag_t *f = &info.frags[i];
@@ -455,12 +458,18 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
 		enum mlx5e_ktls_sync_retval ret =
 			mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq);
 
-		if (likely(ret == MLX5E_KTLS_SYNC_DONE))
+		switch (ret) {
+		case MLX5E_KTLS_SYNC_DONE:
 			*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
-		else if (ret == MLX5E_KTLS_SYNC_FAIL)
+			break;
+		case MLX5E_KTLS_SYNC_SKIP_NO_DATA:
+			if (likely(!skb->decrypted))
+				goto out;
+			WARN_ON_ONCE(1);
+			/* fall-through */
+		default: /* MLX5E_KTLS_SYNC_FAIL */
 			goto err_out;
-		else /* ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA */
-			goto out;
+		}
 	}
 
 	priv_tx->expected_seq = seq + datalen;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c6776f308d5e..d674cb679895 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -218,13 +218,9 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS];
 
 int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
 {
-	int i, num_stats = 0;
-
 	switch (sset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < mlx5e_num_stats_grps; i++)
-			num_stats += mlx5e_stats_grps[i].get_num_stats(priv);
-		return num_stats;
+		return mlx5e_stats_total_num(priv);
 	case ETH_SS_PRIV_FLAGS:
 		return MLX5E_NUM_PFLAGS;
 	case ETH_SS_TEST:
@@ -242,14 +238,6 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 	return mlx5e_ethtool_get_sset_count(priv, sset);
 }
 
-static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data)
-{
-	int i, idx = 0;
-
-	for (i = 0; i < mlx5e_num_stats_grps; i++)
-		idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx);
-}
-
 void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
 {
 	int i;
@@ -268,7 +256,7 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
 		break;
 
 	case ETH_SS_STATS:
-		mlx5e_fill_stats_strings(priv, data);
+		mlx5e_stats_fill_strings(priv, data);
 		break;
 	}
 }
@@ -283,14 +271,13 @@ static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 				     struct ethtool_stats *stats, u64 *data)
 {
-	int i, idx = 0;
+	int idx = 0;
 
 	mutex_lock(&priv->state_lock);
-	mlx5e_update_stats(priv);
+	mlx5e_stats_update(priv);
 	mutex_unlock(&priv->state_lock);
 
-	for (i = 0; i < mlx5e_num_stats_grps; i++)
-		idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx);
+	mlx5e_stats_fill(priv, data, idx);
 }
 
 static void mlx5e_get_ethtool_stats(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index acd946f2ddbe..3bc2ac3d53fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -58,6 +58,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
 						  struct ethtool_rx_flow_spec *fs,
 						  int num_tuples)
 {
+	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5e_ethtool_table *eth_ft;
 	struct mlx5_flow_namespace *ns;
 	struct mlx5_flow_table *ft;
@@ -102,9 +103,11 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
 	table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
 						       flow_table_properties_nic_receive.log_max_ft_size)),
 			   MLX5E_ETHTOOL_NUM_ENTRIES);
-	ft = mlx5_create_auto_grouped_flow_table(ns, prio,
-						 table_size,
-						 MLX5E_ETHTOOL_NUM_GROUPS, 0, 0);
+
+	ft_attr.prio = prio;
+	ft_attr.max_fte = table_size;
+	ft_attr.autogroup.max_num_groups = MLX5E_ETHTOOL_NUM_GROUPS;
+	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
 	if (IS_ERR(ft))
 		return (void *)ft;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4997b8a51994..454d3459bd8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -159,23 +159,14 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
 	mutex_unlock(&priv->state_lock);
 }
 
-void mlx5e_update_stats(struct mlx5e_priv *priv)
-{
-	int i;
-
-	for (i = mlx5e_num_stats_grps - 1; i >= 0; i--)
-		if (mlx5e_stats_grps[i].update_stats)
-			mlx5e_stats_grps[i].update_stats(priv);
-}
-
 void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
 {
 	int i;
 
-	for (i = mlx5e_num_stats_grps - 1; i >= 0; i--)
-		if (mlx5e_stats_grps[i].update_stats_mask &
+	for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--)
+		if (mlx5e_nic_stats_grps[i]->update_stats_mask &
 		    MLX5E_NDO_UPDATE_STATS)
-			mlx5e_stats_grps[i].update_stats(priv);
+			mlx5e_nic_stats_grps[i]->update_stats(priv);
 }
 
 static void mlx5e_update_stats_work(struct work_struct *work)
@@ -4325,7 +4316,7 @@ unlock:
 	rtnl_unlock();
 }
 
-static void mlx5e_tx_timeout(struct net_device *dev)
+static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
@@ -4739,17 +4730,19 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
 			tirc_default_config[tt].rx_hash_fields;
 }
 
-void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+void mlx5e_build_nic_params(struct mlx5e_priv *priv,
 			    struct mlx5e_xsk *xsk,
 			    struct mlx5e_rss_params *rss_params,
 			    struct mlx5e_params *params,
-			    u16 max_channels, u16 mtu)
+			    u16 mtu)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	u8 rx_cq_period_mode;
 
 	params->sw_mtu = mtu;
 	params->hard_mtu = MLX5E_ETH_HARD_MTU;
-	params->num_channels = max_channels;
+	params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
+				     priv->max_nch);
 	params->num_tc       = 1;
 
 	/* SQ */
@@ -4876,6 +4869,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
 					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
 		netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
+					 NETIF_F_GSO_UDP_TUNNEL_CSUM;
 	}
 
 	if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) {
@@ -4986,8 +4981,8 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 	if (err)
 		return err;
 
-	mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params,
-			       priv->max_nch, netdev->mtu);
+	mlx5e_build_nic_params(priv, &priv->xsk, rss, &priv->channels.params,
+			       netdev->mtu);
 
 	mlx5e_timestamp_init(priv);
 
@@ -5149,6 +5144,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
 static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 {
+	struct net_device *netdev = priv->netdev;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -5169,7 +5165,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 		mlx5e_monitor_counter_cleanup(priv);
 
 	mlx5e_disable_async_events(priv);
-	mlx5_lag_remove(mdev);
+	mlx5_lag_remove(mdev, netdev);
 }
 
 int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
@@ -5193,6 +5189,8 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc		   = MLX5E_MAX_NUM_TC,
 	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(XSK),
+	.stats_grps	   = mlx5e_nic_stats_grps,
+	.stats_grps_num	   = mlx5e_nic_stats_grps_num,
 };
 
 /* mlx5e generic netdev management API (move to en_common.c) */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index f175cb24bb67..7b48ccacebe2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -41,6 +41,7 @@
 #include <net/ipv6_stubs.h>
 
 #include "eswitch.h"
+#include "eswitch_offloads_chains.h"
 #include "en.h"
 #include "en_rep.h"
 #include "en_tc.h"
@@ -116,24 +117,71 @@ static const struct counter_desc vport_rep_stats_desc[] = {
 #define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
 #define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc)
 
-static void mlx5e_rep_get_strings(struct net_device *dev,
-				  u32 stringset, uint8_t *data)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep)
 {
-	int i, j;
+	return NUM_VPORT_REP_SW_COUNTERS;
+}
 
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
-			strcpy(data + (i * ETH_GSTRING_LEN),
-			       sw_rep_stats_desc[i].format);
-		for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
-			strcpy(data + (i * ETH_GSTRING_LEN),
-			       vport_rep_stats_desc[j].format);
-		break;
-	}
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw_rep)
+{
+	int i;
+
+	for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       sw_rep_stats_desc[i].format);
+	return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw_rep)
+{
+	int i;
+
+	for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
+						   sw_rep_stats_desc, i);
+	return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep)
+{
+	struct mlx5e_sw_stats *s = &priv->stats.sw;
+	struct rtnl_link_stats64 stats64 = {};
+
+	memset(s, 0, sizeof(*s));
+	mlx5e_fold_sw_stats64(priv, &stats64);
+
+	s->rx_packets = stats64.rx_packets;
+	s->rx_bytes   = stats64.rx_bytes;
+	s->tx_packets = stats64.tx_packets;
+	s->tx_bytes   = stats64.tx_bytes;
+	s->tx_queue_dropped = stats64.tx_dropped;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep)
+{
+	return NUM_VPORT_REP_HW_COUNTERS;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep)
+{
+	int i;
+
+	for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format);
+	return idx;
 }
 
-static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep)
+{
+	int i;
+
+	for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport,
+						   vport_rep_stats_desc, i);
+	return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -156,64 +204,33 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
 	vport_stats->tx_bytes   = vf_stats.rx_bytes;
 }
 
-static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv)
-{
-	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
-	struct rtnl_link_stats64 *vport_stats;
-
-	mlx5e_grp_802_3_update_stats(priv);
-
-	vport_stats = &priv->stats.vf_vport;
-
-	vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
-	vport_stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
-	vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
-	vport_stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
-}
-
-static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
+static void mlx5e_rep_get_strings(struct net_device *dev,
+				  u32 stringset, uint8_t *data)
 {
-	struct mlx5e_sw_stats *s = &priv->stats.sw;
-	struct rtnl_link_stats64 stats64 = {};
-
-	memset(s, 0, sizeof(*s));
-	mlx5e_fold_sw_stats64(priv, &stats64);
+	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	s->rx_packets = stats64.rx_packets;
-	s->rx_bytes   = stats64.rx_bytes;
-	s->tx_packets = stats64.tx_packets;
-	s->tx_bytes   = stats64.tx_bytes;
-	s->tx_queue_dropped = stats64.tx_dropped;
+	switch (stringset) {
+	case ETH_SS_STATS:
+		mlx5e_stats_fill_strings(priv, data);
+		break;
+	}
 }
 
 static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
 					struct ethtool_stats *stats, u64 *data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	int i, j;
-
-	if (!data)
-		return;
-
-	mutex_lock(&priv->state_lock);
-	mlx5e_rep_update_sw_counters(priv);
-	priv->profile->update_stats(priv);
-	mutex_unlock(&priv->state_lock);
 
-	for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++)
-		data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
-					       sw_rep_stats_desc, i);
-
-	for (j = 0; j < NUM_VPORT_REP_HW_COUNTERS; j++, i++)
-		data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport,
-					       vport_rep_stats_desc, j);
+	mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
 }
 
 static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
 {
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
 	switch (sset) {
 	case ETH_SS_STATS:
-		return NUM_VPORT_REP_SW_COUNTERS + NUM_VPORT_REP_HW_COUNTERS;
+		return mlx5e_stats_total_num(priv);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1247,8 +1264,7 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
 				 void *cb_priv)
 {
-	struct flow_cls_offload *f = type_data;
-	struct flow_cls_offload cls_flower;
+	struct flow_cls_offload tmp, *f = type_data;
 	struct mlx5e_priv *priv = cb_priv;
 	struct mlx5_eswitch *esw;
 	unsigned long flags;
@@ -1261,16 +1277,30 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
 
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
-		if (!mlx5_eswitch_prios_supported(esw) || f->common.chain_index)
+		memcpy(&tmp, f, sizeof(*f));
+
+		if (!mlx5_esw_chains_prios_supported(esw) ||
+		    tmp.common.chain_index)
 			return -EOPNOTSUPP;
 
 		/* Re-use tc offload path by moving the ft flow to the
 		 * reserved ft chain.
+		 *
+		 * FT offload can use prio range [0, INT_MAX], so we normalize
+		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+		 * as with tc, where prio 0 isn't supported.
+		 *
+		 * We only support chain 0 of FT offload.
 		 */
-		memcpy(&cls_flower, f, sizeof(*f));
-		cls_flower.common.chain_index = FDB_FT_CHAIN;
-		err = mlx5e_rep_setup_tc_cls_flower(priv, &cls_flower, flags);
-		memcpy(&f->stats, &cls_flower.stats, sizeof(f->stats));
+		if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
+			return -EOPNOTSUPP;
+		if (tmp.common.chain_index != 0)
+			return -EOPNOTSUPP;
+
+		tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+		tmp.common.prio++;
+		err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
+		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
 		return err;
 	default:
 		return -EOPNOTSUPP;
@@ -1660,10 +1690,65 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 	mlx5e_close_drop_rq(&priv->drop_rq);
 }
 
+static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
+{
+	int err = mlx5e_init_rep_rx(priv);
+
+	if (err)
+		return err;
+
+	mlx5e_create_q_counters(priv);
+	return 0;
+}
+
+static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv)
+{
+	mlx5e_destroy_q_counters(priv);
+	mlx5e_cleanup_rep_rx(priv);
+}
+
+static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct net_device *netdev;
+	struct mlx5e_priv *priv;
+	int err;
+
+	netdev = rpriv->netdev;
+	priv = netdev_priv(netdev);
+	uplink_priv = &rpriv->uplink_priv;
+
+	mutex_init(&uplink_priv->unready_flows_lock);
+	INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+	/* init shared tc flow table */
+	err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+	if (err)
+		return err;
+
+	mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
+
+	/* init indirect block notifications */
+	INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+	uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
+	err = register_netdevice_notifier_dev_net(rpriv->netdev,
+						  &uplink_priv->netdevice_nb,
+						  &uplink_priv->netdevice_nn);
+	if (err) {
+		mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
+		goto tc_esw_cleanup;
+	}
+
+	return 0;
+
+tc_esw_cleanup:
+	mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+	return err;
+}
+
 static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_rep_uplink_priv *uplink_priv;
 	int err;
 
 	err = mlx5e_create_tises(priv);
@@ -1673,52 +1758,41 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
 	}
 
 	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
-		uplink_priv = &rpriv->uplink_priv;
-
-		mutex_init(&uplink_priv->unready_flows_lock);
-		INIT_LIST_HEAD(&uplink_priv->unready_flows);
-
-		/* init shared tc flow table */
-		err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+		err = mlx5e_init_uplink_rep_tx(rpriv);
 		if (err)
 			goto destroy_tises;
-
-		mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
-
-		/* init indirect block notifications */
-		INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
-		uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
-		err = register_netdevice_notifier(&uplink_priv->netdevice_nb);
-		if (err) {
-			mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
-			goto tc_esw_cleanup;
-		}
 	}
 
 	return 0;
 
-tc_esw_cleanup:
-	mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
 destroy_tises:
 	mlx5e_destroy_tises(priv);
 	return err;
 }
 
+static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+
+	/* clean indirect TC block notifications */
+	unregister_netdevice_notifier_dev_net(rpriv->netdev,
+					      &uplink_priv->netdevice_nb,
+					      &uplink_priv->netdevice_nn);
+	mlx5e_rep_indr_clean_block_privs(rpriv);
+
+	/* delete shared tc flow table */
+	mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+	mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+}
+
 static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 
 	mlx5e_destroy_tises(priv);
 
-	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
-		/* clean indirect TC block notifications */
-		unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
-		mlx5e_rep_indr_clean_block_privs(rpriv);
-
-		/* delete shared tc flow table */
-		mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
-		mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
-	}
+	if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+		mlx5e_cleanup_uplink_rep_tx(rpriv);
 }
 
 static void mlx5e_rep_enable(struct mlx5e_priv *priv)
@@ -1787,6 +1861,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 
 static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 {
+	struct net_device *netdev = priv->netdev;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 
@@ -1795,7 +1870,44 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 #endif
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
 	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
-	mlx5_lag_remove(mdev);
+	mlx5_lag_remove(mdev, netdev);
+}
+
+static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
+static MLX5E_DEFINE_STATS_GRP(vport_rep, MLX5E_NDO_UPDATE_STATS);
+
+/* The stats groups order is opposite to the update_stats() order calls */
+static mlx5e_stats_grp_t mlx5e_rep_stats_grps[] = {
+	&MLX5E_STATS_GRP(sw_rep),
+	&MLX5E_STATS_GRP(vport_rep),
+};
+
+static unsigned int mlx5e_rep_stats_grps_num(struct mlx5e_priv *priv)
+{
+	return ARRAY_SIZE(mlx5e_rep_stats_grps);
+}
+
+/* The stats groups order is opposite to the update_stats() order calls */
+static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
+	&MLX5E_STATS_GRP(sw),
+	&MLX5E_STATS_GRP(qcnt),
+	&MLX5E_STATS_GRP(vnic_env),
+	&MLX5E_STATS_GRP(vport),
+	&MLX5E_STATS_GRP(802_3),
+	&MLX5E_STATS_GRP(2863),
+	&MLX5E_STATS_GRP(2819),
+	&MLX5E_STATS_GRP(phy),
+	&MLX5E_STATS_GRP(eth_ext),
+	&MLX5E_STATS_GRP(pcie),
+	&MLX5E_STATS_GRP(per_prio),
+	&MLX5E_STATS_GRP(pme),
+	&MLX5E_STATS_GRP(channels),
+	&MLX5E_STATS_GRP(per_port_buff_congest),
+};
+
+static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
+{
+	return ARRAY_SIZE(mlx5e_ul_rep_stats_grps);
 }
 
 static const struct mlx5e_profile mlx5e_rep_profile = {
@@ -1807,29 +1919,33 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
 	.cleanup_tx		= mlx5e_cleanup_rep_tx,
 	.enable		        = mlx5e_rep_enable,
 	.update_rx		= mlx5e_update_rep_rx,
-	.update_stats           = mlx5e_rep_update_hw_counters,
+	.update_stats           = mlx5e_update_ndo_stats,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc			= 1,
 	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
+	.stats_grps		= mlx5e_rep_stats_grps,
+	.stats_grps_num		= mlx5e_rep_stats_grps_num,
 };
 
 static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.init			= mlx5e_init_rep,
 	.cleanup		= mlx5e_cleanup_rep,
-	.init_rx		= mlx5e_init_rep_rx,
-	.cleanup_rx		= mlx5e_cleanup_rep_rx,
+	.init_rx		= mlx5e_init_ul_rep_rx,
+	.cleanup_rx		= mlx5e_cleanup_ul_rep_rx,
 	.init_tx		= mlx5e_init_rep_tx,
 	.cleanup_tx		= mlx5e_cleanup_rep_tx,
 	.enable		        = mlx5e_uplink_rep_enable,
 	.disable	        = mlx5e_uplink_rep_disable,
 	.update_rx		= mlx5e_update_rep_rx,
-	.update_stats           = mlx5e_uplink_rep_update_hw_counters,
+	.update_stats           = mlx5e_update_ndo_stats,
 	.update_carrier	        = mlx5e_update_carrier,
 	.rx_handlers.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
 	.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
 	.max_tc			= MLX5E_MAX_NUM_TC,
 	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
+	.stats_grps		= mlx5e_ul_rep_stats_grps,
+	.stats_grps_num		= mlx5e_ul_rep_stats_grps_num,
 };
 
 static bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 31f83c8adcc9..3f756d51435f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -73,6 +73,7 @@ struct mlx5_rep_uplink_priv {
 	 */
 	struct list_head	    tc_indr_block_priv_list;
 	struct notifier_block	    netdevice_nb;
+	struct netdev_net_notifier  netdevice_nn;
 
 	struct mlx5_tun_entropy tun_entropy;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 9f09253f9f46..30b216d9284c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -35,6 +35,58 @@
 #include "en_accel/ipsec.h"
 #include "en_accel/tls.h"
 
+static unsigned int stats_grps_num(struct mlx5e_priv *priv)
+{
+	return !priv->profile->stats_grps_num ? 0 :
+		priv->profile->stats_grps_num(priv);
+}
+
+unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv)
+{
+	mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+	const unsigned int num_stats_grps = stats_grps_num(priv);
+	unsigned int total = 0;
+	int i;
+
+	for (i = 0; i < num_stats_grps; i++)
+		total += stats_grps[i]->get_num_stats(priv);
+
+	return total;
+}
+
+void mlx5e_stats_update(struct mlx5e_priv *priv)
+{
+	mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+	const unsigned int num_stats_grps = stats_grps_num(priv);
+	int i;
+
+	for (i = num_stats_grps - 1; i >= 0; i--)
+		if (stats_grps[i]->update_stats)
+			stats_grps[i]->update_stats(priv);
+}
+
+void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx)
+{
+	mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+	const unsigned int num_stats_grps = stats_grps_num(priv);
+	int i;
+
+	for (i = 0; i < num_stats_grps; i++)
+		idx = stats_grps[i]->fill_stats(priv, data, idx);
+}
+
+void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data)
+{
+	mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps;
+	const unsigned int num_stats_grps = stats_grps_num(priv);
+	int i, idx = 0;
+
+	for (i = 0; i < num_stats_grps; i++)
+		idx = stats_grps[i]->fill_strings(priv, data, idx);
+}
+
+/* Concrete NIC Stats */
+
 static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
@@ -146,12 +198,12 @@ static const struct counter_desc sw_stats_desc[] = {
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
 
-static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw)
 {
 	return NUM_SW_COUNTERS;
 }
 
-static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(sw)
 {
 	int i;
 
@@ -160,7 +212,7 @@ static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
 	return idx;
 }
 
-static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(sw)
 {
 	int i;
 
@@ -169,7 +221,7 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 	return idx;
 }
 
-static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
 	int i;
@@ -297,6 +349,9 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 			s->tx_tls_drop_bypass_req   += sq_stats->tls_drop_bypass_req;
 #endif
 			s->tx_cqes		+= sq_stats->cqes;
+
+			/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+			barrier();
 		}
 	}
 }
@@ -312,7 +367,7 @@ static const struct counter_desc drop_rq_stats_desc[] = {
 #define NUM_Q_COUNTERS			ARRAY_SIZE(q_stats_desc)
 #define NUM_DROP_RQ_COUNTERS		ARRAY_SIZE(drop_rq_stats_desc)
 
-static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qcnt)
 {
 	int num_stats = 0;
 
@@ -325,7 +380,7 @@ static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv)
 	return num_stats;
 }
 
-static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qcnt)
 {
 	int i;
 
@@ -340,7 +395,7 @@ static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
 	return idx;
 }
 
-static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt)
 {
 	int i;
 
@@ -353,7 +408,7 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 	return idx;
 }
 
-static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt)
 {
 	struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
@@ -388,14 +443,13 @@ static const struct counter_desc vnic_env_stats_dev_oob_desc[] = {
 	(MLX5_CAP_GEN(dev, vnic_env_int_rq_oob) ? \
 	 ARRAY_SIZE(vnic_env_stats_dev_oob_desc) : 0)
 
-static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vnic_env)
 {
 	return NUM_VNIC_ENV_STEER_COUNTERS(priv->mdev) +
 		NUM_VNIC_ENV_DEV_OOB_COUNTERS(priv->mdev);
 }
 
-static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data,
-					   int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vnic_env)
 {
 	int i;
 
@@ -409,8 +463,7 @@ static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data,
-					 int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env)
 {
 	int i;
 
@@ -424,7 +477,7 @@ static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
-static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env)
 {
 	u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
 	int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out);
@@ -487,13 +540,12 @@ static const struct counter_desc vport_stats_desc[] = {
 
 #define NUM_VPORT_COUNTERS		ARRAY_SIZE(vport_stats_desc)
 
-static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport)
 {
 	return NUM_VPORT_COUNTERS;
 }
 
-static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data,
-					int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport)
 {
 	int i;
 
@@ -502,8 +554,7 @@ static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data,
-				      int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport)
 {
 	int i;
 
@@ -513,7 +564,7 @@ static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
-static void mlx5e_grp_vport_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport)
 {
 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
 	u32 *out = (u32 *)priv->stats.vport.query_vport_out;
@@ -552,13 +603,12 @@ static const struct counter_desc pport_802_3_stats_desc[] = {
 
 #define NUM_PPORT_802_3_COUNTERS	ARRAY_SIZE(pport_802_3_stats_desc)
 
-static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(802_3)
 {
 	return NUM_PPORT_802_3_COUNTERS;
 }
 
-static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data,
-					int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(802_3)
 {
 	int i;
 
@@ -567,8 +617,7 @@ static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
-				      int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(802_3)
 {
 	int i;
 
@@ -581,7 +630,7 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data,
 #define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \
 	(MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1)
 
-void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3)
 {
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -609,13 +658,12 @@ static const struct counter_desc pport_2863_stats_desc[] = {
 
 #define NUM_PPORT_2863_COUNTERS		ARRAY_SIZE(pport_2863_stats_desc)
 
-static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2863)
 {
 	return NUM_PPORT_2863_COUNTERS;
 }
 
-static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data,
-				       int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2863)
 {
 	int i;
 
@@ -624,8 +672,7 @@ static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data,
-				     int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2863)
 {
 	int i;
 
@@ -635,7 +682,7 @@ static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
-static void mlx5e_grp_2863_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2863)
 {
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -670,13 +717,12 @@ static const struct counter_desc pport_2819_stats_desc[] = {
 
 #define NUM_PPORT_2819_COUNTERS		ARRAY_SIZE(pport_2819_stats_desc)
 
-static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(2819)
 {
 	return NUM_PPORT_2819_COUNTERS;
 }
 
-static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data,
-				       int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(2819)
 {
 	int i;
 
@@ -685,8 +731,7 @@ static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data,
-				     int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(2819)
 {
 	int i;
 
@@ -696,7 +741,7 @@ static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
-static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819)
 {
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -734,7 +779,7 @@ pport_phy_statistical_err_lanes_stats_desc[] = {
 #define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \
 	ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc)
 
-static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int num_stats;
@@ -751,8 +796,7 @@ static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
 	return num_stats;
 }
 
-static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
-				      int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int i;
@@ -774,7 +818,7 @@ static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int i;
@@ -800,7 +844,7 @@ static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 	return idx;
 }
 
-static void mlx5e_grp_phy_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
 {
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -830,7 +874,7 @@ static const struct counter_desc pport_eth_ext_stats_desc[] = {
 
 #define NUM_PPORT_ETH_EXT_COUNTERS	ARRAY_SIZE(pport_eth_ext_stats_desc)
 
-static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(eth_ext)
 {
 	if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
 		return NUM_PPORT_ETH_EXT_COUNTERS;
@@ -838,8 +882,7 @@ static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv)
 	return 0;
 }
 
-static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data,
-					  int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(eth_ext)
 {
 	int i;
 
@@ -850,8 +893,7 @@ static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data,
-					int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(eth_ext)
 {
 	int i;
 
@@ -863,7 +905,7 @@ static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
-static void mlx5e_grp_eth_ext_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(eth_ext)
 {
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -904,7 +946,7 @@ static const struct counter_desc pcie_perf_stall_stats_desc[] = {
 #define NUM_PCIE_PERF_COUNTERS64	ARRAY_SIZE(pcie_perf_stats_desc64)
 #define NUM_PCIE_PERF_STALL_COUNTERS	ARRAY_SIZE(pcie_perf_stall_stats_desc)
 
-static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie)
 {
 	int num_stats = 0;
 
@@ -920,8 +962,7 @@ static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv)
 	return num_stats;
 }
 
-static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data,
-				       int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie)
 {
 	int i;
 
@@ -942,8 +983,7 @@ static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data,
-				     int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie)
 {
 	int i;
 
@@ -967,7 +1007,7 @@ static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
-static void mlx5e_grp_pcie_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie)
 {
 	struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1015,8 +1055,7 @@ static int mlx5e_grp_per_tc_prio_get_num_stats(struct mlx5e_priv *priv)
 	return NUM_PPORT_PER_TC_PRIO_COUNTERS * NUM_PPORT_PRIO;
 }
 
-static int mlx5e_grp_per_port_buffer_congest_fill_strings(struct mlx5e_priv *priv,
-							  u8 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_port_buff_congest)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int i, prio;
@@ -1036,8 +1075,7 @@ static int mlx5e_grp_per_port_buffer_congest_fill_strings(struct mlx5e_priv *pri
 	return idx;
 }
 
-static int mlx5e_grp_per_port_buffer_congest_fill_stats(struct mlx5e_priv *priv,
-							u64 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_port_buff_congest)
 {
 	struct mlx5e_pport_stats *pport = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1112,13 +1150,13 @@ static void mlx5e_grp_per_tc_congest_prio_update_stats(struct mlx5e_priv *priv)
 	}
 }
 
-static int mlx5e_grp_per_port_buffer_congest_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_port_buff_congest)
 {
 	return mlx5e_grp_per_tc_prio_get_num_stats(priv) +
 		mlx5e_grp_per_tc_congest_prio_get_num_stats(priv);
 }
 
-static void mlx5e_grp_per_port_buffer_congest_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_port_buff_congest)
 {
 	mlx5e_grp_per_tc_prio_update_stats(priv);
 	mlx5e_grp_per_tc_congest_prio_update_stats(priv);
@@ -1130,6 +1168,7 @@ static void mlx5e_grp_per_port_buffer_congest_update_stats(struct mlx5e_priv *pr
 static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
 	{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
 	{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+	{ "rx_prio%d_discards", PPORT_PER_PRIO_OFF(rx_discards) },
 	{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
 	{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
 };
@@ -1292,29 +1331,27 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv,
 	return idx;
 }
 
-static int mlx5e_grp_per_prio_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(per_prio)
 {
 	return mlx5e_grp_per_prio_traffic_get_num_stats() +
 		mlx5e_grp_per_prio_pfc_get_num_stats(priv);
 }
 
-static int mlx5e_grp_per_prio_fill_strings(struct mlx5e_priv *priv, u8 *data,
-					   int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(per_prio)
 {
 	idx = mlx5e_grp_per_prio_traffic_fill_strings(priv, data, idx);
 	idx = mlx5e_grp_per_prio_pfc_fill_strings(priv, data, idx);
 	return idx;
 }
 
-static int mlx5e_grp_per_prio_fill_stats(struct mlx5e_priv *priv, u64 *data,
-					 int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(per_prio)
 {
 	idx = mlx5e_grp_per_prio_traffic_fill_stats(priv, data, idx);
 	idx = mlx5e_grp_per_prio_pfc_fill_stats(priv, data, idx);
 	return idx;
 }
 
-static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(per_prio)
 {
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1349,13 +1386,12 @@ static const struct counter_desc mlx5e_pme_error_desc[] = {
 #define NUM_PME_STATUS_STATS		ARRAY_SIZE(mlx5e_pme_status_desc)
 #define NUM_PME_ERR_STATS		ARRAY_SIZE(mlx5e_pme_error_desc)
 
-static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pme)
 {
 	return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS;
 }
 
-static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
-				      int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pme)
 {
 	int i;
 
@@ -1368,8 +1404,7 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
-				    int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme)
 {
 	struct mlx5_pme_stats pme_stats;
 	int i;
@@ -1387,45 +1422,46 @@ static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
-static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; }
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec)
 {
 	return mlx5e_ipsec_get_count(priv);
 }
 
-static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data,
-					int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec)
 {
 	return idx + mlx5e_ipsec_get_strings(priv,
 					     data + idx * ETH_GSTRING_LEN);
 }
 
-static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data,
-				      int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec)
 {
 	return idx + mlx5e_ipsec_get_stats(priv, data + idx);
 }
 
-static void mlx5e_grp_ipsec_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec)
 {
 	mlx5e_ipsec_update_stats(priv);
 }
 
-static int mlx5e_grp_tls_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls)
 {
 	return mlx5e_tls_get_count(priv);
 }
 
-static int mlx5e_grp_tls_fill_strings(struct mlx5e_priv *priv, u8 *data,
-				      int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(tls)
 {
 	return idx + mlx5e_tls_get_strings(priv, data + idx * ETH_GSTRING_LEN);
 }
 
-static int mlx5e_grp_tls_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(tls)
 {
 	return idx + mlx5e_tls_get_stats(priv, data + idx);
 }
 
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(tls) { return; }
+
 static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
@@ -1559,7 +1595,7 @@ static const struct counter_desc ch_stats_desc[] = {
 #define NUM_XSKSQ_STATS			ARRAY_SIZE(xsksq_stats_desc)
 #define NUM_CH_STATS			ARRAY_SIZE(ch_stats_desc)
 
-static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(channels)
 {
 	int max_nch = priv->max_nch;
 
@@ -1572,8 +1608,7 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
 	       (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used);
 }
 
-static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
-					   int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(channels)
 {
 	bool is_xsk = priv->xsk.ever_used;
 	int max_nch = priv->max_nch;
@@ -1615,8 +1650,7 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
 	return idx;
 }
 
-static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
-					 int idx)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels)
 {
 	bool is_xsk = priv->xsk.ever_used;
 	int max_nch = priv->max_nch;
@@ -1664,104 +1698,46 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
 	return idx;
 }
 
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(channels) { return; }
+
+MLX5E_DEFINE_STATS_GRP(sw, 0);
+MLX5E_DEFINE_STATS_GRP(qcnt, MLX5E_NDO_UPDATE_STATS);
+MLX5E_DEFINE_STATS_GRP(vnic_env, 0);
+MLX5E_DEFINE_STATS_GRP(vport, MLX5E_NDO_UPDATE_STATS);
+MLX5E_DEFINE_STATS_GRP(802_3, MLX5E_NDO_UPDATE_STATS);
+MLX5E_DEFINE_STATS_GRP(2863, 0);
+MLX5E_DEFINE_STATS_GRP(2819, 0);
+MLX5E_DEFINE_STATS_GRP(phy, 0);
+MLX5E_DEFINE_STATS_GRP(pcie, 0);
+MLX5E_DEFINE_STATS_GRP(per_prio, 0);
+MLX5E_DEFINE_STATS_GRP(pme, 0);
+MLX5E_DEFINE_STATS_GRP(channels, 0);
+MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
+MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
+static MLX5E_DEFINE_STATS_GRP(ipsec, 0);
+static MLX5E_DEFINE_STATS_GRP(tls, 0);
+
 /* The stats groups order is opposite to the update_stats() order calls */
-const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
-	{
-		.get_num_stats = mlx5e_grp_sw_get_num_stats,
-		.fill_strings = mlx5e_grp_sw_fill_strings,
-		.fill_stats = mlx5e_grp_sw_fill_stats,
-		.update_stats = mlx5e_grp_sw_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_q_get_num_stats,
-		.fill_strings = mlx5e_grp_q_fill_strings,
-		.fill_stats = mlx5e_grp_q_fill_stats,
-		.update_stats_mask = MLX5E_NDO_UPDATE_STATS,
-		.update_stats = mlx5e_grp_q_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_vnic_env_get_num_stats,
-		.fill_strings = mlx5e_grp_vnic_env_fill_strings,
-		.fill_stats = mlx5e_grp_vnic_env_fill_stats,
-		.update_stats = mlx5e_grp_vnic_env_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_vport_get_num_stats,
-		.fill_strings = mlx5e_grp_vport_fill_strings,
-		.fill_stats = mlx5e_grp_vport_fill_stats,
-		.update_stats_mask = MLX5E_NDO_UPDATE_STATS,
-		.update_stats = mlx5e_grp_vport_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_802_3_get_num_stats,
-		.fill_strings = mlx5e_grp_802_3_fill_strings,
-		.fill_stats = mlx5e_grp_802_3_fill_stats,
-		.update_stats_mask = MLX5E_NDO_UPDATE_STATS,
-		.update_stats = mlx5e_grp_802_3_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_2863_get_num_stats,
-		.fill_strings = mlx5e_grp_2863_fill_strings,
-		.fill_stats = mlx5e_grp_2863_fill_stats,
-		.update_stats = mlx5e_grp_2863_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_2819_get_num_stats,
-		.fill_strings = mlx5e_grp_2819_fill_strings,
-		.fill_stats = mlx5e_grp_2819_fill_stats,
-		.update_stats = mlx5e_grp_2819_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_phy_get_num_stats,
-		.fill_strings = mlx5e_grp_phy_fill_strings,
-		.fill_stats = mlx5e_grp_phy_fill_stats,
-		.update_stats = mlx5e_grp_phy_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_eth_ext_get_num_stats,
-		.fill_strings = mlx5e_grp_eth_ext_fill_strings,
-		.fill_stats = mlx5e_grp_eth_ext_fill_stats,
-		.update_stats = mlx5e_grp_eth_ext_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_pcie_get_num_stats,
-		.fill_strings = mlx5e_grp_pcie_fill_strings,
-		.fill_stats = mlx5e_grp_pcie_fill_stats,
-		.update_stats = mlx5e_grp_pcie_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_per_prio_get_num_stats,
-		.fill_strings = mlx5e_grp_per_prio_fill_strings,
-		.fill_stats = mlx5e_grp_per_prio_fill_stats,
-		.update_stats = mlx5e_grp_per_prio_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_pme_get_num_stats,
-		.fill_strings = mlx5e_grp_pme_fill_strings,
-		.fill_stats = mlx5e_grp_pme_fill_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_ipsec_get_num_stats,
-		.fill_strings = mlx5e_grp_ipsec_fill_strings,
-		.fill_stats = mlx5e_grp_ipsec_fill_stats,
-		.update_stats = mlx5e_grp_ipsec_update_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_tls_get_num_stats,
-		.fill_strings = mlx5e_grp_tls_fill_strings,
-		.fill_stats = mlx5e_grp_tls_fill_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_channels_get_num_stats,
-		.fill_strings = mlx5e_grp_channels_fill_strings,
-		.fill_stats = mlx5e_grp_channels_fill_stats,
-	},
-	{
-		.get_num_stats = mlx5e_grp_per_port_buffer_congest_get_num_stats,
-		.fill_strings = mlx5e_grp_per_port_buffer_congest_fill_strings,
-		.fill_stats = mlx5e_grp_per_port_buffer_congest_fill_stats,
-		.update_stats = mlx5e_grp_per_port_buffer_congest_update_stats,
-	},
+mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
+	&MLX5E_STATS_GRP(sw),
+	&MLX5E_STATS_GRP(qcnt),
+	&MLX5E_STATS_GRP(vnic_env),
+	&MLX5E_STATS_GRP(vport),
+	&MLX5E_STATS_GRP(802_3),
+	&MLX5E_STATS_GRP(2863),
+	&MLX5E_STATS_GRP(2819),
+	&MLX5E_STATS_GRP(phy),
+	&MLX5E_STATS_GRP(eth_ext),
+	&MLX5E_STATS_GRP(pcie),
+	&MLX5E_STATS_GRP(per_prio),
+	&MLX5E_STATS_GRP(pme),
+	&MLX5E_STATS_GRP(ipsec),
+	&MLX5E_STATS_GRP(tls),
+	&MLX5E_STATS_GRP(channels),
+	&MLX5E_STATS_GRP(per_port_buff_congest),
 };
 
-const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps);
+unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
+{
+	return ARRAY_SIZE(mlx5e_nic_stats_grps);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 869f3502f631..092b39ffa32a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -29,6 +29,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+
 #ifndef __MLX5_EN_STATS_H__
 #define __MLX5_EN_STATS_H__
 
@@ -55,6 +56,56 @@ struct counter_desc {
 	size_t		offset; /* Byte offset */
 };
 
+enum {
+	MLX5E_NDO_UPDATE_STATS = BIT(0x1),
+};
+
+struct mlx5e_priv;
+struct mlx5e_stats_grp {
+	u16 update_stats_mask;
+	int (*get_num_stats)(struct mlx5e_priv *priv);
+	int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx);
+	int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx);
+	void (*update_stats)(struct mlx5e_priv *priv);
+};
+
+typedef const struct mlx5e_stats_grp *const mlx5e_stats_grp_t;
+
+#define MLX5E_STATS_GRP_OP(grp, name) mlx5e_stats_grp_ ## grp ## _ ## name
+
+#define MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(grp) \
+	int MLX5E_STATS_GRP_OP(grp, num_stats)(struct mlx5e_priv *priv)
+
+#define MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(grp) \
+	void MLX5E_STATS_GRP_OP(grp, update_stats)(struct mlx5e_priv *priv)
+
+#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(grp) \
+	int MLX5E_STATS_GRP_OP(grp, fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx)
+
+#define MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(grp) \
+	int MLX5E_STATS_GRP_OP(grp, fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx)
+
+#define MLX5E_STATS_GRP(grp) mlx5e_stats_grp_ ## grp
+
+#define MLX5E_DECLARE_STATS_GRP(grp) \
+	const struct mlx5e_stats_grp MLX5E_STATS_GRP(grp)
+
+#define MLX5E_DEFINE_STATS_GRP(grp, mask) \
+MLX5E_DECLARE_STATS_GRP(grp) = { \
+	.get_num_stats = MLX5E_STATS_GRP_OP(grp, num_stats), \
+	.fill_stats    = MLX5E_STATS_GRP_OP(grp, fill_stats), \
+	.fill_strings  = MLX5E_STATS_GRP_OP(grp, fill_strings), \
+	.update_stats  = MLX5E_STATS_GRP_OP(grp, update_stats), \
+	.update_stats_mask = mask, \
+}
+
+unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv);
+void mlx5e_stats_update(struct mlx5e_priv *priv);
+void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx);
+void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data);
+
+/* Concrete NIC Stats */
+
 struct mlx5e_sw_stats {
 	u64 rx_packets;
 	u64 rx_bytes;
@@ -322,22 +373,22 @@ struct mlx5e_stats {
 	struct mlx5e_pcie_stats pcie;
 };
 
-enum {
-	MLX5E_NDO_UPDATE_STATS = BIT(0x1),
-};
-
-struct mlx5e_priv;
-struct mlx5e_stats_grp {
-	u16 update_stats_mask;
-	int (*get_num_stats)(struct mlx5e_priv *priv);
-	int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx);
-	int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx);
-	void (*update_stats)(struct mlx5e_priv *priv);
-};
-
-extern const struct mlx5e_stats_grp mlx5e_stats_grps[];
-extern const int mlx5e_num_stats_grps;
+extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[];
+unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv);
 
-void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv);
+extern MLX5E_DECLARE_STATS_GRP(sw);
+extern MLX5E_DECLARE_STATS_GRP(qcnt);
+extern MLX5E_DECLARE_STATS_GRP(vnic_env);
+extern MLX5E_DECLARE_STATS_GRP(vport);
+extern MLX5E_DECLARE_STATS_GRP(802_3);
+extern MLX5E_DECLARE_STATS_GRP(2863);
+extern MLX5E_DECLARE_STATS_GRP(2819);
+extern MLX5E_DECLARE_STATS_GRP(phy);
+extern MLX5E_DECLARE_STATS_GRP(eth_ext);
+extern MLX5E_DECLARE_STATS_GRP(pcie);
+extern MLX5E_DECLARE_STATS_GRP(per_prio);
+extern MLX5E_DECLARE_STATS_GRP(pme);
+extern MLX5E_DECLARE_STATS_GRP(channels);
+extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest);
 
 #endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 024e1cddfd0e..74091f72c9a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -51,6 +51,7 @@
 #include "en_rep.h"
 #include "en_tc.h"
 #include "eswitch.h"
+#include "eswitch_offloads_chains.h"
 #include "fs_core.h"
 #include "en/port.h"
 #include "en/tc_tun.h"
@@ -960,7 +961,8 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 
 	mutex_lock(&priv->fs.tc.t_lock);
 	if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
-		int tc_grp_size, tc_tbl_size;
+		struct mlx5_flow_table_attr ft_attr = {};
+		int tc_grp_size, tc_tbl_size, tc_num_grps;
 		u32 max_flow_counter;
 
 		max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
@@ -970,13 +972,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 
 		tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
 				    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
+		tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
 
+		ft_attr.prio = MLX5E_TC_PRIO;
+		ft_attr.max_fte = tc_tbl_size;
+		ft_attr.level = MLX5E_TC_FT_LEVEL;
+		ft_attr.autogroup.max_num_groups = tc_num_grps;
 		priv->fs.tc.t =
 			mlx5_create_auto_grouped_flow_table(priv->fs.ns,
-							    MLX5E_TC_PRIO,
-							    tc_tbl_size,
-							    MLX5E_TC_TABLE_NUM_GROUPS,
-							    MLX5E_TC_FT_LEVEL, 0);
+							    &ft_attr);
 		if (IS_ERR(priv->fs.tc.t)) {
 			mutex_unlock(&priv->fs.tc.t_lock);
 			NL_SET_ERR_MSG_MOD(extack,
@@ -1080,7 +1084,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
 	memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	slow_attr->split_count = 0;
-	slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN;
+	slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
 
 	rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
 	if (!IS_ERR(rule))
@@ -1097,7 +1101,7 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
 	memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
 	slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	slow_attr->split_count = 0;
-	slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN;
+	slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
 	mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
 	flow_flag_clear(flow, SLOW);
 }
@@ -1157,19 +1161,18 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	u32 max_chain = mlx5_eswitch_get_chain_range(esw);
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
-	u16 max_prio = mlx5_eswitch_get_prio_range(esw);
 	struct net_device *out_dev, *encap_dev = NULL;
 	struct mlx5_fc *counter = NULL;
 	struct mlx5e_rep_priv *rpriv;
 	struct mlx5e_priv *out_priv;
 	bool encap_valid = true;
+	u32 max_prio, max_chain;
 	int err = 0;
 	int out_index;
 
-	if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) {
+	if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
 		NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
 		return -EOPNOTSUPP;
 	}
@@ -1179,11 +1182,13 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	 * FDB_FT_CHAIN which is outside tc range.
 	 * See mlx5e_rep_setup_ft_cb().
 	 */
+	max_chain = mlx5_esw_chains_get_chain_range(esw);
 	if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
 		NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
 		return -EOPNOTSUPP;
 	}
 
+	max_prio = mlx5_esw_chains_get_prio_range(esw);
 	if (attr->prio > max_prio) {
 		NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
 		return -EOPNOTSUPP;
@@ -1805,6 +1810,40 @@ static void *get_match_headers_value(u32 flags,
 			     outer_headers);
 }
 
+static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
+				   struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct net_device *ingress_dev;
+	struct flow_match_meta match;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
+		return 0;
+
+	flow_rule_match_meta(rule, &match);
+	if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
+		return -EINVAL;
+	}
+
+	ingress_dev = __dev_get_by_index(dev_net(filter_dev),
+					 match.key->ingress_ifindex);
+	if (!ingress_dev) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can't find the ingress port to match on");
+		return -EINVAL;
+	}
+
+	if (ingress_dev != filter_dev) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can't match on the ingress filter port");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int __parse_cls_flower(struct mlx5e_priv *priv,
 			      struct mlx5_flow_spec *spec,
 			      struct flow_cls_offload *f,
@@ -1825,6 +1864,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 	u8 *match_level;
+	int err;
 
 	match_level = outer_match_level;
 
@@ -1868,6 +1908,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 						    spec);
 	}
 
+	err = mlx5e_flower_parse_meta(filter_dev, f);
+	if (err)
+		return err;
+
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 		struct flow_match_basic match;
 
@@ -2842,6 +2886,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 
 	flow_action_for_each(i, act, flow_action) {
 		switch (act->id) {
+		case FLOW_ACTION_ACCEPT:
+			action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			break;
 		case FLOW_ACTION_DROP:
 			action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 			if (MLX5_CAP_FLOWTABLE(priv->mdev,
@@ -3462,7 +3510,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			break;
 		case FLOW_ACTION_GOTO: {
 			u32 dest_chain = act->chain_index;
-			u32 max_chain = mlx5_eswitch_get_chain_range(esw);
+			u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
 
 			if (ft_flow) {
 				NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
@@ -4036,6 +4084,13 @@ static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
 	u32 rate_mbps;
 	int err;
 
+	vport_num = rpriv->rep->vport;
+	if (vport_num >= MLX5_VPORT_ECPF) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Ingress rate limit is supported only for Eswitch ports connected to VFs");
+		return -EOPNOTSUPP;
+	}
+
 	esw = priv->mdev->priv.eswitch;
 	/* rate is given in bytes/sec.
 	 * First convert to bits/sec and then round to the nearest mbit/secs.
@@ -4044,8 +4099,6 @@ static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
 	 * 1 mbit/sec.
 	 */
 	rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
-	vport_num = rpriv->rep->vport;
-
 	err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
 	if (err)
 		NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
@@ -4198,7 +4251,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 		return err;
 
 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
-	if (register_netdevice_notifier(&tc->netdevice_nb)) {
+	err = register_netdevice_notifier_dev_net(priv->netdev,
+						  &tc->netdevice_nb,
+						  &tc->netdevice_nn);
+	if (err) {
 		tc->netdevice_nb.notifier_call = NULL;
 		mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
 	}
@@ -4220,7 +4276,9 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
 
 	if (tc->netdevice_nb.notifier_call)
-		unregister_netdevice_notifier(&tc->netdevice_nb);
+		unregister_netdevice_notifier_dev_net(priv->netdev,
+						      &tc->netdevice_nb,
+						      &tc->netdevice_nn);
 
 	mutex_destroy(&tc->mod_hdr.lock);
 	mutex_destroy(&tc->hairpin_tbl_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 580c71cb9dfa..cccea3a8eddd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -156,7 +156,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
 			cq->comp(cq, eqe);
 			mlx5_cq_put(cq);
 		} else {
-			mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+			dev_dbg_ratelimited(eq->dev->device,
+					    "Completion event for bogus CQ 0x%x\n", cqn);
 		}
 
 		++eq->cons_index;
@@ -563,6 +564,39 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
 		gather_user_async_events(dev, mask);
 }
 
+static int
+setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
+	       struct mlx5_eq_param *param, const char *name)
+{
+	int err;
+
+	eq->irq_nb.notifier_call = mlx5_eq_async_int;
+
+	err = create_async_eq(dev, &eq->core, param);
+	if (err) {
+		mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
+		return err;
+	}
+	err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
+	if (err) {
+		mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
+		destroy_async_eq(dev, &eq->core);
+	}
+	return err;
+}
+
+static void cleanup_async_eq(struct mlx5_core_dev *dev,
+			     struct mlx5_eq_async *eq, const char *name)
+{
+	int err;
+
+	mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
+	err = destroy_async_eq(dev, &eq->core);
+	if (err)
+		mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
+			      name, err);
+}
+
 static int create_async_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -572,77 +606,45 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 	MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
 	mlx5_eq_notifier_register(dev, &table->cq_err_nb);
 
-	table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
 		.irq_index = 0,
 		.nent = MLX5_NUM_CMD_EQE,
+		.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
 	};
-
-	param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD;
-	err = create_async_eq(dev, &table->cmd_eq.core, &param);
-	if (err) {
-		mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
-		goto err0;
-	}
-	err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
-	if (err) {
-		mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
+	err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
+	if (err)
 		goto err1;
-	}
+
 	mlx5_cmd_use_events(dev);
 
-	table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
 		.irq_index = 0,
 		.nent = MLX5_NUM_ASYNC_EQE,
 	};
 
 	gather_async_events_mask(dev, param.mask);
-	err = create_async_eq(dev, &table->async_eq.core, &param);
-	if (err) {
-		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+	err = setup_async_eq(dev, &table->async_eq, &param, "async");
+	if (err)
 		goto err2;
-	}
-	err = mlx5_eq_enable(dev, &table->async_eq.core,
-			     &table->async_eq.irq_nb);
-	if (err) {
-		mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
-		goto err3;
-	}
 
-	table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
 	param = (struct mlx5_eq_param) {
 		.irq_index = 0,
 		.nent = /* TODO: sriov max_vf + */ 1,
+		.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
 	};
 
-	param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST;
-	err = create_async_eq(dev, &table->pages_eq.core, &param);
-	if (err) {
-		mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
-		goto err4;
-	}
-	err = mlx5_eq_enable(dev, &table->pages_eq.core,
-			     &table->pages_eq.irq_nb);
-	if (err) {
-		mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
-		goto err5;
-	}
+	err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
+	if (err)
+		goto err3;
 
-	return err;
+	return 0;
 
-err5:
-	destroy_async_eq(dev, &table->pages_eq.core);
-err4:
-	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
 err3:
-	destroy_async_eq(dev, &table->async_eq.core);
+	cleanup_async_eq(dev, &table->async_eq, "async");
 err2:
 	mlx5_cmd_use_polling(dev);
-	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+	cleanup_async_eq(dev, &table->cmd_eq, "cmd");
 err1:
-	destroy_async_eq(dev, &table->cmd_eq.core);
-err0:
 	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 	return err;
 }
@@ -650,28 +652,11 @@ err0:
 static void destroy_async_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
-	int err;
-
-	mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
-	err = destroy_async_eq(dev, &table->pages_eq.core);
-	if (err)
-		mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
-			      err);
-
-	mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
-	err = destroy_async_eq(dev, &table->async_eq.core);
-	if (err)
-		mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
-			      err);
 
+	cleanup_async_eq(dev, &table->pages_eq, "pages");
+	cleanup_async_eq(dev, &table->async_eq, "async");
 	mlx5_cmd_use_polling(dev);
-
-	mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
-	err = destroy_async_eq(dev, &table->cmd_eq.core);
-	if (err)
-		mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
-			      err);
-
+	cleanup_async_eq(dev, &table->cmd_eq, "cmd");
 	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2c965ad0d744..5acf60b1bbfe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -277,6 +277,7 @@ enum {
 
 static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
 {
+	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_flow_namespace *root_ns;
 	struct mlx5_flow_table *fdb;
@@ -289,8 +290,10 @@ static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
 	}
 
 	/* num FTE 2, num FG 2 */
-	fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO,
-						  2, 2, 0, 0);
+	ft_attr.prio = LEGACY_VEPA_PRIO;
+	ft_attr.max_fte = 2;
+	ft_attr.autogroup.max_num_groups = 2;
+	fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
@@ -1928,8 +1931,10 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
 	struct mlx5_vport *vport;
 	int i;
 
-	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
 		memset(&vport->info, 0, sizeof(vport->info));
+		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+	}
 }
 
 /* Public E-Switch API */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ffcff3ba3701..4472710ccc9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -157,7 +157,7 @@ enum offloads_fdb_flags {
 	ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
 };
 
-extern const unsigned int ESW_POOLS[4];
+struct mlx5_esw_chains_priv;
 
 struct mlx5_eswitch_fdb {
 	union {
@@ -182,14 +182,7 @@ struct mlx5_eswitch_fdb {
 			struct mlx5_flow_handle *miss_rule_multi;
 			int vlan_push_pop_refcount;
 
-			struct {
-				struct mlx5_flow_table *fdb;
-				u32 num_rules;
-			} fdb_prio[FDB_NUM_CHAINS][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO];
-			/* Protects fdb_prio table */
-			struct mutex fdb_prio_lock;
-
-			int fdb_left[ARRAY_SIZE(ESW_POOLS)];
+			struct mlx5_esw_chains_priv *esw_chains_priv;
 		} offloads;
 	};
 	u32 flags;
@@ -355,15 +348,6 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
 			  struct mlx5_flow_handle *rule,
 			  struct mlx5_esw_flow_attr *attr);
 
-bool
-mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw);
-
-u16
-mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw);
-
-u32
-mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
-
 struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
 				  struct mlx5_flow_destination *dest);
@@ -388,6 +372,11 @@ enum {
 	MLX5_ESW_DEST_ENCAP_VALID   = BIT(1),
 };
 
+enum {
+	MLX5_ESW_ATTR_FLAG_VLAN_HANDLED  = BIT(0),
+	MLX5_ESW_ATTR_FLAG_SLOW_PATH     = BIT(1),
+};
+
 struct mlx5_esw_flow_attr {
 	struct mlx5_eswitch_rep *in_rep;
 	struct mlx5_core_dev	*in_mdev;
@@ -401,7 +390,6 @@ struct mlx5_esw_flow_attr {
 	u16	vlan_vid[MLX5_FS_VLAN_DEPTH];
 	u8	vlan_prio[MLX5_FS_VLAN_DEPTH];
 	u8	total_vlan;
-	bool	vlan_handled;
 	struct {
 		u32 flags;
 		struct mlx5_eswitch_rep *rep;
@@ -416,6 +404,7 @@ struct mlx5_esw_flow_attr {
 	u32	chain;
 	u16	prio;
 	u32	dest_chain;
+	u32	flags;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 243a5440867e..979f13bdc203 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -37,6 +37,7 @@
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
 #include "eswitch.h"
+#include "eswitch_offloads_chains.h"
 #include "rdma.h"
 #include "en.h"
 #include "fs_core.h"
@@ -47,10 +48,6 @@
  * one for multicast.
  */
 #define MLX5_ESW_MISS_FLOWS (2)
-
-#define fdb_prio_table(esw, chain, prio, level) \
-	(esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
-
 #define UPLINK_REP_INDEX 0
 
 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
@@ -62,32 +59,6 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
 	return &esw->offloads.vport_reps[idx];
 }
 
-static struct mlx5_flow_table *
-esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
-static void
-esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
-
-bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
-{
-	return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
-}
-
-u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
-{
-	if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
-		return FDB_TC_MAX_CHAIN;
-
-	return 0;
-}
-
-u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
-{
-	if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
-		return FDB_TC_MAX_PRIO;
-
-	return 1;
-}
-
 static bool
 esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
 				   const struct mlx5_vport *vport)
@@ -175,10 +146,17 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-		if (attr->dest_chain) {
-			struct mlx5_flow_table *ft;
+		struct mlx5_flow_table *ft;
 
-			ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
+		if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
+			flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+			dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+			dest[i].ft = mlx5_esw_chains_get_tc_end_ft(esw);
+			i++;
+		} else if (attr->dest_chain) {
+			flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+			ft = mlx5_esw_chains_get_table(esw, attr->dest_chain,
+						       1, 0);
 			if (IS_ERR(ft)) {
 				rule = ERR_CAST(ft);
 				goto err_create_goto_table;
@@ -223,7 +201,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		flow_act.modify_hdr = attr->modify_hdr;
 
-	fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
+	fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio,
+					!!split);
 	if (IS_ERR(fdb)) {
 		rule = ERR_CAST(fdb);
 		goto err_esw_get;
@@ -242,10 +221,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	return rule;
 
 err_add_rule:
-	esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
+	mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, !!split);
 err_esw_get:
-	if (attr->dest_chain)
-		esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+	if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain)
+		mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
 err_create_goto_table:
 	return rule;
 }
@@ -262,13 +241,13 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 	struct mlx5_flow_handle *rule;
 	int i;
 
-	fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
+	fast_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 0);
 	if (IS_ERR(fast_fdb)) {
 		rule = ERR_CAST(fast_fdb);
 		goto err_get_fast;
 	}
 
-	fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
+	fwd_fdb = mlx5_esw_chains_get_table(esw, attr->chain, attr->prio, 1);
 	if (IS_ERR(fwd_fdb)) {
 		rule = ERR_CAST(fwd_fdb);
 		goto err_get_fwd;
@@ -296,6 +275,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 	if (attr->outer_match_level != MLX5_MATCH_NONE)
 		spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
+	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 	rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
 
 	if (IS_ERR(rule))
@@ -305,9 +285,9 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 
 	return rule;
 add_err:
-	esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+	mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1);
 err_get_fwd:
-	esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+	mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
 err_get_fast:
 	return rule;
 }
@@ -332,12 +312,13 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 	atomic64_dec(&esw->offloads.num_flows);
 
 	if (fwd_rule)  {
-		esw_put_prio_table(esw, attr->chain, attr->prio, 1);
-		esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+		mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 1);
+		mlx5_esw_chains_put_table(esw, attr->chain, attr->prio, 0);
 	} else {
-		esw_put_prio_table(esw, attr->chain, attr->prio, !!split);
+		mlx5_esw_chains_put_table(esw, attr->chain, attr->prio,
+					  !!split);
 		if (attr->dest_chain)
-			esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+			mlx5_esw_chains_put_table(esw, attr->dest_chain, 1, 0);
 	}
 }
 
@@ -451,7 +432,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 	if (err)
 		goto unlock;
 
-	attr->vlan_handled = false;
+	attr->flags &= ~MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
 
 	vport = esw_vlan_action_get_vport(attr, push, pop);
 
@@ -459,7 +440,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 		/* tracks VF --> wire rules without vlan push action */
 		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
 			vport->vlan_refcount++;
-			attr->vlan_handled = true;
+			attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
 		}
 
 		goto unlock;
@@ -490,7 +471,7 @@ skip_set_push:
 	}
 out:
 	if (!err)
-		attr->vlan_handled = true;
+		attr->flags |= MLX5_ESW_ATTR_FLAG_VLAN_HANDLED;
 unlock:
 	mutex_unlock(&esw->state_lock);
 	return err;
@@ -508,7 +489,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 	if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
 		return 0;
 
-	if (!attr->vlan_handled)
+	if (!(attr->flags & MLX5_ESW_ATTR_FLAG_VLAN_HANDLED))
 		return 0;
 
 	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
@@ -582,8 +563,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
 	dest.vport.num = vport;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
-					&flow_act, &dest, 1);
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					spec, &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule))
 		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 out:
@@ -824,8 +805,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	dest.vport.num = esw->manager_vport;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
-					&flow_act, &dest, 1);
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					spec, &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
 		esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
@@ -839,8 +820,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
 			      outer_headers.dmac_47_16);
 	dmac_v[0] = 0x01;
-	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
-					&flow_act, &dest, 1);
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					spec, &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
 		esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
@@ -855,174 +836,6 @@ out:
 	return err;
 }
 
-#define ESW_OFFLOADS_NUM_GROUPS  4
-
-/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
- * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
- * for each flow table pool. We can allocate up to 16M of each pool,
- * and we keep track of how much we used via put/get_sz_to_pool.
- * Firmware doesn't report any of this for now.
- * ESW_POOL is expected to be sorted from large to small
- */
-#define ESW_SIZE (16 * 1024 * 1024)
-const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
-				    64 * 1024, 4 * 1024 };
-
-static int
-get_sz_from_pool(struct mlx5_eswitch *esw)
-{
-	int sz = 0, i;
-
-	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
-		if (esw->fdb_table.offloads.fdb_left[i]) {
-			--esw->fdb_table.offloads.fdb_left[i];
-			sz = ESW_POOLS[i];
-			break;
-		}
-	}
-
-	return sz;
-}
-
-static void
-put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
-		if (sz >= ESW_POOLS[i]) {
-			++esw->fdb_table.offloads.fdb_left[i];
-			break;
-		}
-	}
-}
-
-static struct mlx5_flow_table *
-create_next_size_table(struct mlx5_eswitch *esw,
-		       struct mlx5_flow_namespace *ns,
-		       u16 table_prio,
-		       int level,
-		       u32 flags)
-{
-	struct mlx5_flow_table *fdb;
-	int sz;
-
-	sz = get_sz_from_pool(esw);
-	if (!sz)
-		return ERR_PTR(-ENOSPC);
-
-	fdb = mlx5_create_auto_grouped_flow_table(ns,
-						  table_prio,
-						  sz,
-						  ESW_OFFLOADS_NUM_GROUPS,
-						  level,
-						  flags);
-	if (IS_ERR(fdb)) {
-		esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
-			 (int)PTR_ERR(fdb), table_prio, level, sz);
-		put_sz_to_pool(esw, sz);
-	}
-
-	return fdb;
-}
-
-static struct mlx5_flow_table *
-esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
-{
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_table *fdb = NULL;
-	struct mlx5_flow_namespace *ns;
-	int table_prio, l = 0;
-	u32 flags = 0;
-
-	if (chain == FDB_TC_SLOW_PATH_CHAIN)
-		return esw->fdb_table.offloads.slow_fdb;
-
-	mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
-
-	fdb = fdb_prio_table(esw, chain, prio, level).fdb;
-	if (fdb) {
-		/* take ref on earlier levels as well */
-		while (level >= 0)
-			fdb_prio_table(esw, chain, prio, level--).num_rules++;
-		mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
-		return fdb;
-	}
-
-	ns = mlx5_get_fdb_sub_ns(dev, chain);
-	if (!ns) {
-		esw_warn(dev, "Failed to get FDB sub namespace\n");
-		mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
-		return ERR_PTR(-EOPNOTSUPP);
-	}
-
-	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
-		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
-			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
-
-	table_prio = prio - 1;
-
-	/* create earlier levels for correct fs_core lookup when
-	 * connecting tables
-	 */
-	for (l = 0; l <= level; l++) {
-		if (fdb_prio_table(esw, chain, prio, l).fdb) {
-			fdb_prio_table(esw, chain, prio, l).num_rules++;
-			continue;
-		}
-
-		fdb = create_next_size_table(esw, ns, table_prio, l, flags);
-		if (IS_ERR(fdb)) {
-			l--;
-			goto err_create_fdb;
-		}
-
-		fdb_prio_table(esw, chain, prio, l).fdb = fdb;
-		fdb_prio_table(esw, chain, prio, l).num_rules = 1;
-	}
-
-	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
-	return fdb;
-
-err_create_fdb:
-	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
-	if (l >= 0)
-		esw_put_prio_table(esw, chain, prio, l);
-
-	return fdb;
-}
-
-static void
-esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
-{
-	int l;
-
-	if (chain == FDB_TC_SLOW_PATH_CHAIN)
-		return;
-
-	mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
-
-	for (l = level; l >= 0; l--) {
-		if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
-			continue;
-
-		put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
-		mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
-		fdb_prio_table(esw, chain, prio, l).fdb = NULL;
-	}
-
-	mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
-}
-
-static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
-{
-	/* If lazy creation isn't supported, deref the fast path tables */
-	if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
-		esw_put_prio_table(esw, 0, 1, 1);
-		esw_put_prio_table(esw, 0, 1, 0);
-	}
-}
-
 #define MAX_PF_SQ 256
 #define MAX_SQ_NVPORTS 32
 
@@ -1055,16 +868,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
-	u32 *flow_group_in, max_flow_counter;
 	struct mlx5_flow_namespace *root_ns;
 	struct mlx5_flow_table *fdb = NULL;
-	int table_size, ix, err = 0, i;
+	u32 flags = 0, *flow_group_in;
+	int table_size, ix, err = 0;
 	struct mlx5_flow_group *g;
-	u32 flags = 0, fdb_max;
 	void *match_criteria;
 	u8 *dmac;
 
 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
+
 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
@@ -1083,19 +896,6 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 		goto ns_err;
 	}
 
-	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
-			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
-	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
-
-	esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n",
-		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
-		  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
-		  fdb_max);
-
-	for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
-		esw->fdb_table.offloads.fdb_left[i] =
-			ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
-
 	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
 		MLX5_ESW_MISS_FLOWS + esw->total_vports;
 
@@ -1118,16 +918,10 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	}
 	esw->fdb_table.offloads.slow_fdb = fdb;
 
-	/* If lazy creation isn't supported, open the fast path tables now */
-	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
-	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
-		esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
-		esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
-		esw_get_prio_table(esw, 0, 1, 0);
-		esw_get_prio_table(esw, 0, 1, 1);
-	} else {
-		esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
-		esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+	err = mlx5_esw_chains_create(esw);
+	if (err) {
+		esw_warn(dev, "Failed to create fdb chains err(%d)\n", err);
+		goto fdb_chains_err;
 	}
 
 	/* create send-to-vport group */
@@ -1218,7 +1012,8 @@ miss_err:
 peer_miss_err:
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 send_vport_err:
-	esw_destroy_offloads_fast_fdb_tables(esw);
+	mlx5_esw_chains_destroy(esw);
+fdb_chains_err:
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
 slow_fdb_err:
 	/* Holds true only as long as DMFS is the default */
@@ -1240,8 +1035,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
+	mlx5_esw_chains_destroy(esw);
 	mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
-	esw_destroy_offloads_fast_fdb_tables(esw);
 	/* Holds true only as long as DMFS is the default */
 	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
 				     MLX5_FLOW_STEERING_MODE_DMFS);
@@ -1377,7 +1172,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
 		return -EINVAL;
 	}
 
-	mlx5_eswitch_disable(esw, false);
+	mlx5_eswitch_disable(esw, true);
 	mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs);
 	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS);
 	if (err) {
@@ -2111,7 +1906,6 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
 		total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
 
 	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
-	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
 
 	err = esw_create_uplink_offloads_acl_tables(esw);
 	if (err)
@@ -2220,7 +2014,8 @@ int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type
 
 int esw_offloads_enable(struct mlx5_eswitch *esw)
 {
-	int err;
+	struct mlx5_vport *vport;
+	int err, i;
 
 	if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
 	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
@@ -2237,6 +2032,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
 	if (err)
 		goto err_vport_metadata;
 
+	/* Representor will control the vport link state */
+	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN;
+
 	err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE);
 	if (err)
 		goto err_vports;
@@ -2266,7 +2065,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
 {
 	int err, err1;
 
-	mlx5_eswitch_disable(esw, false);
+	mlx5_eswitch_disable(esw, true);
 	err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY);
 	if (err) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
new file mode 100644
index 000000000000..c5a446e295aa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c
@@ -0,0 +1,758 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2020 Mellanox Technologies.
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/fs.h>
+
+#include "eswitch_offloads_chains.h"
+#include "mlx5_core.h"
+#include "fs_core.h"
+#include "eswitch.h"
+#include "en.h"
+
+#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
+#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
+#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
+#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
+#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
+#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
+#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb)
+#define fdb_ignore_flow_level_supported(esw) \
+	(MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level))
+
+#define ESW_OFFLOADS_NUM_GROUPS  4
+
+/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
+ * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via get_next_avail_sz_from_pool.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small and match firmware
+ * pools.
+ */
+#define ESW_SIZE (16 * 1024 * 1024)
+static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024,
+					  1 * 1024 * 1024,
+					  64 * 1024,
+					  4 * 1024, };
+
+struct mlx5_esw_chains_priv {
+	struct rhashtable chains_ht;
+	struct rhashtable prios_ht;
+	/* Protects above chains_ht and prios_ht */
+	struct mutex lock;
+
+	struct mlx5_flow_table *tc_end_fdb;
+
+	int fdb_left[ARRAY_SIZE(ESW_POOLS)];
+};
+
+struct fdb_chain {
+	struct rhash_head node;
+
+	u32 chain;
+
+	int ref;
+
+	struct mlx5_eswitch *esw;
+	struct list_head prios_list;
+};
+
+struct fdb_prio_key {
+	u32 chain;
+	u32 prio;
+	u32 level;
+};
+
+struct fdb_prio {
+	struct rhash_head node;
+	struct list_head list;
+
+	struct fdb_prio_key key;
+
+	int ref;
+
+	struct fdb_chain *fdb_chain;
+	struct mlx5_flow_table *fdb;
+	struct mlx5_flow_table *next_fdb;
+	struct mlx5_flow_group *miss_group;
+	struct mlx5_flow_handle *miss_rule;
+};
+
+static const struct rhashtable_params chain_params = {
+	.head_offset = offsetof(struct fdb_chain, node),
+	.key_offset = offsetof(struct fdb_chain, chain),
+	.key_len = sizeof_field(struct fdb_chain, chain),
+	.automatic_shrinking = true,
+};
+
+static const struct rhashtable_params prio_params = {
+	.head_offset = offsetof(struct fdb_prio, node),
+	.key_offset = offsetof(struct fdb_prio, key),
+	.key_len = sizeof_field(struct fdb_prio, key),
+	.automatic_shrinking = true,
+};
+
+bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw)
+{
+	return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+}
+
+u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw)
+{
+	if (!mlx5_esw_chains_prios_supported(esw))
+		return 1;
+
+	if (fdb_ignore_flow_level_supported(esw))
+		return UINT_MAX - 1;
+
+	return FDB_TC_MAX_CHAIN;
+}
+
+u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw)
+{
+	return mlx5_esw_chains_get_chain_range(esw) + 1;
+}
+
+u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw)
+{
+	if (!mlx5_esw_chains_prios_supported(esw))
+		return 1;
+
+	if (fdb_ignore_flow_level_supported(esw))
+		return UINT_MAX;
+
+	return FDB_TC_MAX_PRIO;
+}
+
+static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw)
+{
+	if (fdb_ignore_flow_level_supported(esw))
+		return UINT_MAX;
+
+	return FDB_TC_LEVELS_PER_PRIO;
+}
+
+#define POOL_NEXT_SIZE 0
+static int
+mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw,
+				       int desired_size)
+{
+	int i, found_i = -1;
+
+	for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
+		if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) {
+			found_i = i;
+			if (desired_size != POOL_NEXT_SIZE)
+				break;
+		}
+	}
+
+	if (found_i != -1) {
+		--fdb_pool_left(esw)[found_i];
+		return ESW_POOLS[found_i];
+	}
+
+	return 0;
+}
+
+static void
+mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) {
+		if (sz == ESW_POOLS[i]) {
+			++fdb_pool_left(esw)[i];
+			return;
+		}
+	}
+
+	WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz);
+}
+
+static void
+mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw)
+{
+	u32 fdb_max;
+	int i;
+
+	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size);
+
+	for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--)
+		fdb_pool_left(esw)[i] =
+			ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
+}
+
+static struct mlx5_flow_table *
+mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw,
+				 u32 chain, u32 prio, u32 level)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *fdb;
+	int sz;
+
+	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+		ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+				  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+	sz = mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE);
+	if (!sz)
+		return ERR_PTR(-ENOSPC);
+	ft_attr.max_fte = sz;
+
+	/* We use tc_slow_fdb(esw) as the table's next_ft till
+	 * ignore_flow_level is allowed on FT creation and not just for FTEs.
+	 * Instead caller should add an explicit miss rule if needed.
+	 */
+	ft_attr.next_ft = tc_slow_fdb(esw);
+
+	/* The root table(chain 0, prio 1, level 0) is required to be
+	 * connected to the previous prio (FDB_BYPASS_PATH if exists).
+	 * We always create it, as a managed table, in order to align with
+	 * fs_core logic.
+	 */
+	if (!fdb_ignore_flow_level_supported(esw) ||
+	    (chain == 0 && prio == 1 && level == 0)) {
+		ft_attr.level = level;
+		ft_attr.prio = prio - 1;
+		ns = mlx5_get_fdb_sub_ns(esw->dev, chain);
+	} else {
+		ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
+		ft_attr.prio = FDB_TC_OFFLOAD;
+		/* Firmware doesn't allow us to create another level 0 table,
+		 * so we create all unmanaged tables as level 1.
+		 *
+		 * To connect them, we use explicit miss rules with
+		 * ignore_flow_level. Caller is responsible to create
+		 * these rules (if needed).
+		 */
+		ft_attr.level = 1;
+		ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB);
+	}
+
+	ft_attr.autogroup.num_reserved_entries = 2;
+	ft_attr.autogroup.max_num_groups = ESW_OFFLOADS_NUM_GROUPS;
+	fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+	if (IS_ERR(fdb)) {
+		esw_warn(esw->dev,
+			 "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
+			 (int)PTR_ERR(fdb), chain, prio, level, sz);
+		mlx5_esw_chains_put_sz_to_pool(esw, sz);
+		return fdb;
+	}
+
+	return fdb;
+}
+
+static void
+mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
+				  struct mlx5_flow_table *fdb)
+{
+	mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte);
+	mlx5_destroy_flow_table(fdb);
+}
+
+static struct fdb_chain *
+mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
+{
+	struct fdb_chain *fdb_chain = NULL;
+	int err;
+
+	fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL);
+	if (!fdb_chain)
+		return ERR_PTR(-ENOMEM);
+
+	fdb_chain->esw = esw;
+	fdb_chain->chain = chain;
+	INIT_LIST_HEAD(&fdb_chain->prios_list);
+
+	err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
+				     chain_params);
+	if (err)
+		goto err_insert;
+
+	return fdb_chain;
+
+err_insert:
+	kvfree(fdb_chain);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain)
+{
+	struct mlx5_eswitch *esw = fdb_chain->esw;
+
+	rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
+			       chain_params);
+	kvfree(fdb_chain);
+}
+
+static struct fdb_chain *
+mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
+{
+	struct fdb_chain *fdb_chain;
+
+	fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain,
+					   chain_params);
+	if (!fdb_chain) {
+		fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain);
+		if (IS_ERR(fdb_chain))
+			return fdb_chain;
+	}
+
+	fdb_chain->ref++;
+
+	return fdb_chain;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
+			      struct mlx5_flow_table *next_fdb)
+{
+	static const struct mlx5_flow_spec spec = {};
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act act = {};
+
+	act.flags  = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND;
+	act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dest.type  = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = next_fdb;
+
+	return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1);
+}
+
+static int
+mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
+				  struct mlx5_flow_table *next_fdb)
+{
+	struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
+	struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
+	struct fdb_prio *pos;
+	int n = 0, err;
+
+	if (fdb_prio->key.level)
+		return 0;
+
+	/* Iterate in reverse order until reaching the level 0 rule of
+	 * the previous priority, adding all the miss rules first, so we can
+	 * revert them if any of them fails.
+	 */
+	pos = fdb_prio;
+	list_for_each_entry_continue_reverse(pos,
+					     &fdb_chain->prios_list,
+					     list) {
+		miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb,
+							      next_fdb);
+		if (IS_ERR(miss_rules[n])) {
+			err = PTR_ERR(miss_rules[n]);
+			goto err_prev_rule;
+		}
+
+		n++;
+		if (!pos->key.level)
+			break;
+	}
+
+	/* Success, delete old miss rules, and update the pointers. */
+	n = 0;
+	pos = fdb_prio;
+	list_for_each_entry_continue_reverse(pos,
+					     &fdb_chain->prios_list,
+					     list) {
+		mlx5_del_flow_rules(pos->miss_rule);
+
+		pos->miss_rule = miss_rules[n];
+		pos->next_fdb = next_fdb;
+
+		n++;
+		if (!pos->key.level)
+			break;
+	}
+
+	return 0;
+
+err_prev_rule:
+	while (--n >= 0)
+		mlx5_del_flow_rules(miss_rules[n]);
+
+	return err;
+}
+
+static void
+mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain)
+{
+	if (--fdb_chain->ref == 0)
+		mlx5_esw_chains_destroy_fdb_chain(fdb_chain);
+}
+
+static struct fdb_prio *
+mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
+				u32 chain, u32 prio, u32 level)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_handle *miss_rule = NULL;
+	struct mlx5_flow_group *miss_group;
+	struct fdb_prio *fdb_prio = NULL;
+	struct mlx5_flow_table *next_fdb;
+	struct fdb_chain *fdb_chain;
+	struct mlx5_flow_table *fdb;
+	struct list_head *pos;
+	u32 *flow_group_in;
+	int err;
+
+	fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain);
+	if (IS_ERR(fdb_chain))
+		return ERR_CAST(fdb_chain);
+
+	fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL);
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!fdb_prio || !flow_group_in) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	/* Chain's prio list is sorted by prio and level.
+	 * And all levels of some prio point to the next prio's level 0.
+	 * Example list (prio, level):
+	 * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
+	 * In hardware, we will we have the following pointers:
+	 * (3,0) -> (5,0) -> (7,0) -> Slow path
+	 * (3,1) -> (5,0)
+	 * (5,1) -> (7,0)
+	 * (6,1) -> (7,0)
+	 */
+
+	/* Default miss for each chain: */
+	next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ?
+		    tc_slow_fdb(esw) :
+		    tc_end_fdb(esw);
+	list_for_each(pos, &fdb_chain->prios_list) {
+		struct fdb_prio *p = list_entry(pos, struct fdb_prio, list);
+
+		/* exit on first pos that is larger */
+		if (prio < p->key.prio || (prio == p->key.prio &&
+					   level < p->key.level)) {
+			/* Get next level 0 table */
+			next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb;
+			break;
+		}
+	}
+
+	fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		goto err_create;
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
+		 fdb->max_fte - 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 fdb->max_fte - 1);
+	miss_group = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(miss_group)) {
+		err = PTR_ERR(miss_group);
+		goto err_group;
+	}
+
+	/* Add miss rule to next_fdb */
+	miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb);
+	if (IS_ERR(miss_rule)) {
+		err = PTR_ERR(miss_rule);
+		goto err_miss_rule;
+	}
+
+	fdb_prio->miss_group = miss_group;
+	fdb_prio->miss_rule = miss_rule;
+	fdb_prio->next_fdb = next_fdb;
+	fdb_prio->fdb_chain = fdb_chain;
+	fdb_prio->key.chain = chain;
+	fdb_prio->key.prio = prio;
+	fdb_prio->key.level = level;
+	fdb_prio->fdb = fdb;
+
+	err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node,
+				     prio_params);
+	if (err)
+		goto err_insert;
+
+	list_add(&fdb_prio->list, pos->prev);
+
+	/* Table is ready, connect it */
+	err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb);
+	if (err)
+		goto err_update;
+
+	kvfree(flow_group_in);
+	return fdb_prio;
+
+err_update:
+	list_del(&fdb_prio->list);
+	rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
+			       prio_params);
+err_insert:
+	mlx5_del_flow_rules(miss_rule);
+err_miss_rule:
+	mlx5_destroy_flow_group(miss_group);
+err_group:
+	mlx5_esw_chains_destroy_fdb_table(esw, fdb);
+err_create:
+err_alloc:
+	kvfree(fdb_prio);
+	kvfree(flow_group_in);
+	mlx5_esw_chains_put_fdb_chain(fdb_chain);
+	return ERR_PTR(err);
+}
+
+static void
+mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw,
+				 struct fdb_prio *fdb_prio)
+{
+	struct fdb_chain *fdb_chain = fdb_prio->fdb_chain;
+
+	WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio,
+						  fdb_prio->next_fdb));
+
+	list_del(&fdb_prio->list);
+	rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node,
+			       prio_params);
+	mlx5_del_flow_rules(fdb_prio->miss_rule);
+	mlx5_destroy_flow_group(fdb_prio->miss_group);
+	mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb);
+	mlx5_esw_chains_put_fdb_chain(fdb_chain);
+	kvfree(fdb_prio);
+}
+
+struct mlx5_flow_table *
+mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+			  u32 level)
+{
+	struct mlx5_flow_table *prev_fts;
+	struct fdb_prio *fdb_prio;
+	struct fdb_prio_key key;
+	int l = 0;
+
+	if ((chain > mlx5_esw_chains_get_chain_range(esw) &&
+	     chain != mlx5_esw_chains_get_ft_chain(esw)) ||
+	    prio > mlx5_esw_chains_get_prio_range(esw) ||
+	    level > mlx5_esw_chains_get_level_range(esw))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	/* create earlier levels for correct fs_core lookup when
+	 * connecting tables.
+	 */
+	for (l = 0; l < level; l++) {
+		prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l);
+		if (IS_ERR(prev_fts)) {
+			fdb_prio = ERR_CAST(prev_fts);
+			goto err_get_prevs;
+		}
+	}
+
+	key.chain = chain;
+	key.prio = prio;
+	key.level = level;
+
+	mutex_lock(&esw_chains_lock(esw));
+	fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
+					  prio_params);
+	if (!fdb_prio) {
+		fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain,
+							   prio, level);
+		if (IS_ERR(fdb_prio))
+			goto err_create_prio;
+	}
+
+	++fdb_prio->ref;
+	mutex_unlock(&esw_chains_lock(esw));
+
+	return fdb_prio->fdb;
+
+err_create_prio:
+	mutex_unlock(&esw_chains_lock(esw));
+err_get_prevs:
+	while (--l >= 0)
+		mlx5_esw_chains_put_table(esw, chain, prio, l);
+	return ERR_CAST(fdb_prio);
+}
+
+void
+mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+			  u32 level)
+{
+	struct fdb_prio *fdb_prio;
+	struct fdb_prio_key key;
+
+	key.chain = chain;
+	key.prio = prio;
+	key.level = level;
+
+	mutex_lock(&esw_chains_lock(esw));
+	fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key,
+					  prio_params);
+	if (!fdb_prio)
+		goto err_get_prio;
+
+	if (--fdb_prio->ref == 0)
+		mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio);
+	mutex_unlock(&esw_chains_lock(esw));
+
+	while (level-- > 0)
+		mlx5_esw_chains_put_table(esw, chain, prio, level);
+
+	return;
+
+err_get_prio:
+	mutex_unlock(&esw_chains_lock(esw));
+	WARN_ONCE(1,
+		  "Couldn't find table: (chain: %d prio: %d level: %d)",
+		  chain, prio, level);
+}
+
+struct mlx5_flow_table *
+mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw)
+{
+	return tc_end_fdb(esw);
+}
+
+static int
+mlx5_esw_chains_init(struct mlx5_eswitch *esw)
+{
+	struct mlx5_esw_chains_priv *chains_priv;
+	struct mlx5_core_dev *dev = esw->dev;
+	u32 max_flow_counter, fdb_max;
+	int err;
+
+	chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
+	if (!chains_priv)
+		return -ENOMEM;
+	esw_chains_priv(esw) = chains_priv;
+
+	max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+			    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+	fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+
+	esw_debug(dev,
+		  "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n",
+		  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, fdb_max);
+
+	mlx5_esw_chains_init_sz_pool(esw);
+
+	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
+	    esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+		esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+		esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n");
+	} else {
+		esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+		esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
+			 mlx5_esw_chains_get_chain_range(esw),
+			 mlx5_esw_chains_get_prio_range(esw));
+	}
+
+	err = rhashtable_init(&esw_chains_ht(esw), &chain_params);
+	if (err)
+		goto init_chains_ht_err;
+
+	err = rhashtable_init(&esw_prios_ht(esw), &prio_params);
+	if (err)
+		goto init_prios_ht_err;
+
+	mutex_init(&esw_chains_lock(esw));
+
+	return 0;
+
+init_prios_ht_err:
+	rhashtable_destroy(&esw_chains_ht(esw));
+init_chains_ht_err:
+	kfree(chains_priv);
+	return err;
+}
+
+static void
+mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
+{
+	mutex_destroy(&esw_chains_lock(esw));
+	rhashtable_destroy(&esw_prios_ht(esw));
+	rhashtable_destroy(&esw_chains_ht(esw));
+
+	kfree(esw_chains_priv(esw));
+}
+
+static int
+mlx5_esw_chains_open(struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_table *ft;
+	int err;
+
+	/* Create tc_end_fdb(esw) which is the always created ft chain */
+	ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw),
+				       1, 0);
+	if (IS_ERR(ft))
+		return PTR_ERR(ft);
+
+	tc_end_fdb(esw) = ft;
+
+	/* Always open the root for fast path */
+	ft = mlx5_esw_chains_get_table(esw, 0, 1, 0);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto level_0_err;
+	}
+
+	/* Open level 1 for split rules now if prios isn't supported  */
+	if (!mlx5_esw_chains_prios_supported(esw)) {
+		ft = mlx5_esw_chains_get_table(esw, 0, 1, 1);
+
+		if (IS_ERR(ft)) {
+			err = PTR_ERR(ft);
+			goto level_1_err;
+		}
+	}
+
+	return 0;
+
+level_1_err:
+	mlx5_esw_chains_put_table(esw, 0, 1, 0);
+level_0_err:
+	mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
+	return err;
+}
+
+static void
+mlx5_esw_chains_close(struct mlx5_eswitch *esw)
+{
+	if (!mlx5_esw_chains_prios_supported(esw))
+		mlx5_esw_chains_put_table(esw, 0, 1, 1);
+	mlx5_esw_chains_put_table(esw, 0, 1, 0);
+	mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0);
+}
+
+int
+mlx5_esw_chains_create(struct mlx5_eswitch *esw)
+{
+	int err;
+
+	err = mlx5_esw_chains_init(esw);
+	if (err)
+		return err;
+
+	err = mlx5_esw_chains_open(esw);
+	if (err)
+		goto err_open;
+
+	return 0;
+
+err_open:
+	mlx5_esw_chains_cleanup(esw);
+	return err;
+}
+
+void
+mlx5_esw_chains_destroy(struct mlx5_eswitch *esw)
+{
+	mlx5_esw_chains_close(esw);
+	mlx5_esw_chains_cleanup(esw);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
new file mode 100644
index 000000000000..2e13097fe348
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_ESW_CHAINS_H__
+#define __ML5_ESW_CHAINS_H__
+
+bool
+mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw);
+u32
+mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw);
+u32
+mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw);
+u32
+mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw);
+
+struct mlx5_flow_table *
+mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+			  u32 level);
+void
+mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+			  u32 level);
+
+struct mlx5_flow_table *
+mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
+
+int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
+void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
+
+#endif /* __ML5_ESW_CHAINS_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 366bda1bb1c3..dc08ed9339ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -50,8 +50,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 			    struct mlx5_flow_act *flow_act)
 {
 	static const struct mlx5_flow_spec spec = {};
+	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_namespace *root_ns;
-	int prio, flags;
 	int err;
 
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
@@ -63,10 +63,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 	/* As this is the terminating action then the termination table is the
 	 * same prio as the slow path
 	 */
-	prio = FDB_SLOW_PATH;
-	flags = MLX5_FLOW_TABLE_TERMINATION;
-	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1,
-							  0, flags);
+	ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION;
+	ft_attr.prio = FDB_SLOW_PATH;
+	ft_attr.max_fte = 1;
+	ft_attr.autogroup.max_num_groups = 1;
+	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(tt->termtbl)) {
 		esw_warn(dev, "Failed to create termination table\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 3c816e81f8d9..b25465d9e030 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -432,6 +432,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(set_fte_in, in, table_type, ft->type);
 	MLX5_SET(set_fte_in, in, table_id,   ft->id);
 	MLX5_SET(set_fte_in, in, flow_index, fte->index);
+	MLX5_SET(set_fte_in, in, ignore_flow_level,
+		 !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL));
+
 	if (ft->vport) {
 		MLX5_SET(set_fte_in, in, vport_number, ft->vport);
 		MLX5_SET(set_fte_in, in, other_vport, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8c5df6c7d7b6..c7a16ae05fa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -579,7 +579,9 @@ static void del_sw_flow_group(struct fs_node *node)
 
 	rhashtable_destroy(&fg->ftes_hash);
 	ida_destroy(&fg->fte_allocator);
-	if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
+	if (ft->autogroup.active &&
+	    fg->max_ftes == ft->autogroup.group_size &&
+	    fg->start_index < ft->autogroup.max_fte)
 		ft->autogroup.num_groups--;
 	err = rhltable_remove(&ft->fgs_hash,
 			      &fg->hash,
@@ -1006,7 +1008,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 							u16 vport)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ns->node);
-	struct mlx5_flow_table *next_ft = NULL;
+	bool unmanaged = ft_attr->flags & MLX5_FLOW_TABLE_UNMANAGED;
+	struct mlx5_flow_table *next_ft;
 	struct fs_prio *fs_prio = NULL;
 	struct mlx5_flow_table *ft;
 	int log_table_sz;
@@ -1023,14 +1026,21 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 		err = -EINVAL;
 		goto unlock_root;
 	}
-	if (ft_attr->level >= fs_prio->num_levels) {
-		err = -ENOSPC;
-		goto unlock_root;
+	if (!unmanaged) {
+		/* The level is related to the
+		 * priority level range.
+		 */
+		if (ft_attr->level >= fs_prio->num_levels) {
+			err = -ENOSPC;
+			goto unlock_root;
+		}
+
+		ft_attr->level += fs_prio->start_level;
 	}
+
 	/* The level is related to the
 	 * priority level range.
 	 */
-	ft_attr->level += fs_prio->start_level;
 	ft = alloc_flow_table(ft_attr->level,
 			      vport,
 			      ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
@@ -1043,19 +1053,27 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 
 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
-	next_ft = find_next_chained_ft(fs_prio);
+	next_ft = unmanaged ? ft_attr->next_ft :
+			      find_next_chained_ft(fs_prio);
 	ft->def_miss_action = ns->def_miss_action;
 	err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
 	if (err)
 		goto free_ft;
 
-	err = connect_flow_table(root->dev, ft, fs_prio);
-	if (err)
-		goto destroy_ft;
+	if (!unmanaged) {
+		err = connect_flow_table(root->dev, ft, fs_prio);
+		if (err)
+			goto destroy_ft;
+	}
+
 	ft->node.active = true;
 	down_write_ref_node(&fs_prio->node, false);
-	tree_add_node(&ft->node, &fs_prio->node);
-	list_add_flow_table(ft, fs_prio);
+	if (!unmanaged) {
+		tree_add_node(&ft->node, &fs_prio->node);
+		list_add_flow_table(ft, fs_prio);
+	} else {
+		ft->node.root = fs_prio->node.root;
+	}
 	fs_prio->num_ft++;
 	up_write_ref_node(&fs_prio->node, false);
 	mutex_unlock(&root->chain_lock);
@@ -1103,31 +1121,27 @@ EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
 
 struct mlx5_flow_table*
 mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
-				    int prio,
-				    int num_flow_table_entries,
-				    int max_num_groups,
-				    u32 level,
-				    u32 flags)
+				    struct mlx5_flow_table_attr *ft_attr)
 {
-	struct mlx5_flow_table_attr ft_attr = {};
+	int num_reserved_entries = ft_attr->autogroup.num_reserved_entries;
+	int autogroups_max_fte = ft_attr->max_fte - num_reserved_entries;
+	int max_num_groups = ft_attr->autogroup.max_num_groups;
 	struct mlx5_flow_table *ft;
 
-	if (max_num_groups > num_flow_table_entries)
+	if (max_num_groups > autogroups_max_fte)
+		return ERR_PTR(-EINVAL);
+	if (num_reserved_entries > ft_attr->max_fte)
 		return ERR_PTR(-EINVAL);
 
-	ft_attr.max_fte = num_flow_table_entries;
-	ft_attr.prio    = prio;
-	ft_attr.level   = level;
-	ft_attr.flags   = flags;
-
-	ft = mlx5_create_flow_table(ns, &ft_attr);
+	ft = mlx5_create_flow_table(ns, ft_attr);
 	if (IS_ERR(ft))
 		return ft;
 
 	ft->autogroup.active = true;
 	ft->autogroup.required_groups = max_num_groups;
+	ft->autogroup.max_fte = autogroups_max_fte;
 	/* We save place for flow groups in addition to max types */
-	ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
+	ft->autogroup.group_size = autogroups_max_fte / (max_num_groups + 1);
 
 	return ft;
 }
@@ -1149,7 +1163,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	struct mlx5_flow_group *fg;
 	int err;
 
-	if (ft->autogroup.active)
+	if (ft->autogroup.active && start_index < ft->autogroup.max_fte)
 		return ERR_PTR(-EPERM);
 
 	down_write_ref_node(&ft->node, false);
@@ -1322,9 +1336,10 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft
 						     const struct mlx5_flow_spec *spec)
 {
 	struct list_head *prev = &ft->node.children;
-	struct mlx5_flow_group *fg;
+	u32 max_fte = ft->autogroup.max_fte;
 	unsigned int candidate_index = 0;
 	unsigned int group_size = 0;
+	struct mlx5_flow_group *fg;
 
 	if (!ft->autogroup.active)
 		return ERR_PTR(-ENOENT);
@@ -1332,7 +1347,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft
 	if (ft->autogroup.num_groups < ft->autogroup.required_groups)
 		group_size = ft->autogroup.group_size;
 
-	/*  ft->max_fte == ft->autogroup.max_types */
+	/*  max_fte == ft->autogroup.max_types */
 	if (group_size == 0)
 		group_size = 1;
 
@@ -1345,7 +1360,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table  *ft
 		prev = &fg->node.list;
 	}
 
-	if (candidate_index + group_size > ft->max_fte)
+	if (candidate_index + group_size > max_fte)
 		return ERR_PTR(-ENOSPC);
 
 	fg = alloc_insert_flow_group(ft,
@@ -1529,18 +1544,30 @@ static bool counter_is_valid(u32 action)
 }
 
 static bool dest_is_valid(struct mlx5_flow_destination *dest,
-			  u32 action,
+			  struct mlx5_flow_act *flow_act,
 			  struct mlx5_flow_table *ft)
 {
+	bool ignore_level = flow_act->flags & FLOW_ACT_IGNORE_FLOW_LEVEL;
+	u32 action = flow_act->action;
+
 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
 		return counter_is_valid(action);
 
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return true;
 
+	if (ignore_level) {
+		if (ft->type != FS_FT_FDB)
+			return false;
+
+		if (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+		    dest->ft->type != FS_FT_FDB)
+			return false;
+	}
+
 	if (!dest || ((dest->type ==
 	    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) &&
-	    (dest->ft->level <= ft->level)))
+	    (dest->ft->level <= ft->level && !ignore_level)))
 		return false;
 	return true;
 }
@@ -1770,7 +1797,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		return ERR_PTR(-EINVAL);
 
 	for (i = 0; i < dest_num; i++) {
-		if (!dest_is_valid(&dest[i], flow_act->action, ft))
+		if (!dest_is_valid(&dest[i], flow_act, ft))
 			return ERR_PTR(-EINVAL);
 	}
 	nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
@@ -2033,7 +2060,8 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft)
 	int err = 0;
 
 	mutex_lock(&root->chain_lock);
-	err = disconnect_flow_table(ft);
+	if (!(ft->flags & MLX5_FLOW_TABLE_UNMANAGED))
+		err = disconnect_flow_table(ft);
 	if (err) {
 		mutex_unlock(&root->chain_lock);
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index c2621b911563..be5f5e32c1e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -164,6 +164,7 @@ struct mlx5_flow_table {
 		unsigned int		required_groups;
 		unsigned int		group_size;
 		unsigned int		num_groups;
+		unsigned int		max_fte;
 	} autogroup;
 	/* Protect fwd_rules */
 	struct mutex			lock;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index a19790dee7b2..d89ff1d09119 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -131,11 +131,11 @@ static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev)
 				   MLX5_PCAM_REGS_5000_TO_507F);
 }
 
-static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev)
+static int mlx5_get_mcam_access_reg_group(struct mlx5_core_dev *dev,
+					  enum mlx5_mcam_reg_groups group)
 {
-	return mlx5_query_mcam_reg(dev, dev->caps.mcam,
-				   MLX5_MCAM_FEATURE_ENHANCED_FEATURES,
-				   MLX5_MCAM_REGS_FIRST_128);
+	return mlx5_query_mcam_reg(dev, dev->caps.mcam[group],
+				   MLX5_MCAM_FEATURE_ENHANCED_FEATURES, group);
 }
 
 static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev)
@@ -221,8 +221,11 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, pcam_reg))
 		mlx5_get_pcam_reg(dev);
 
-	if (MLX5_CAP_GEN(dev, mcam_reg))
-		mlx5_get_mcam_reg(dev);
+	if (MLX5_CAP_GEN(dev, mcam_reg)) {
+		mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_FIRST_128);
+		mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9080_0x90FF);
+		mlx5_get_mcam_access_reg_group(dev, MLX5_MCAM_REGS_0x9100_0x917F);
+	}
 
 	if (MLX5_CAP_GEN(dev, qcam_reg))
 		mlx5_get_qcam_reg(dev);
@@ -245,6 +248,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN_64(dev, general_obj_types) &
+		MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 3ed8ab2d703d..56078b23f1a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,8 +87,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
 	mlx5e_set_netdev_mtu_boundaries(priv);
 	netdev->mtu = netdev->max_mtu;
 
-	mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params,
-			       priv->max_nch, netdev->mtu);
+	mlx5e_build_nic_params(priv, NULL, &priv->rss_params, &priv->channels.params,
+			       netdev->mtu);
 	mlx5i_build_nic_params(mdev, &priv->channels.params);
 
 	mlx5e_timestamp_init(priv);
@@ -419,6 +419,28 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 	mlx5e_destroy_q_counters(priv);
 }
 
+/* The stats groups order is opposite to the update_stats() order calls */
+static mlx5e_stats_grp_t mlx5i_stats_grps[] = {
+	&MLX5E_STATS_GRP(sw),
+	&MLX5E_STATS_GRP(qcnt),
+	&MLX5E_STATS_GRP(vnic_env),
+	&MLX5E_STATS_GRP(vport),
+	&MLX5E_STATS_GRP(802_3),
+	&MLX5E_STATS_GRP(2863),
+	&MLX5E_STATS_GRP(2819),
+	&MLX5E_STATS_GRP(phy),
+	&MLX5E_STATS_GRP(pcie),
+	&MLX5E_STATS_GRP(per_prio),
+	&MLX5E_STATS_GRP(pme),
+	&MLX5E_STATS_GRP(channels),
+	&MLX5E_STATS_GRP(per_port_buff_congest),
+};
+
+static unsigned int mlx5i_stats_grps_num(struct mlx5e_priv *priv)
+{
+	return ARRAY_SIZE(mlx5i_stats_grps);
+}
+
 static const struct mlx5e_profile mlx5i_nic_profile = {
 	.init		   = mlx5i_init,
 	.cleanup	   = mlx5i_cleanup,
@@ -435,6 +457,8 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
 	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
 	.max_tc		   = MLX5I_MAX_NUM_TC,
 	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(REGULAR),
+	.stats_grps        = mlx5i_stats_grps,
+	.stats_grps_num    = mlx5i_stats_grps_num,
 };
 
 /* mlx5i netdev NDos */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index fc0d9583475d..b91eabc09fbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -586,7 +586,8 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 
 	if (!ldev->nb.notifier_call) {
 		ldev->nb.notifier_call = mlx5_lag_netdev_event;
-		if (register_netdevice_notifier(&ldev->nb)) {
+		if (register_netdevice_notifier_dev_net(netdev, &ldev->nb,
+							&ldev->nn)) {
 			ldev->nb.notifier_call = NULL;
 			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
 		}
@@ -599,7 +600,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 }
 
 /* Must be called with intf_mutex held */
-void mlx5_lag_remove(struct mlx5_core_dev *dev)
+void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev)
 {
 	struct mlx5_lag *ldev;
 	int i;
@@ -619,7 +620,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev)
 
 	if (i == MLX5_MAX_PORTS) {
 		if (ldev->nb.notifier_call)
-			unregister_netdevice_notifier(&ldev->nb);
+			unregister_netdevice_notifier_dev_net(netdev, &ldev->nb,
+							      &ldev->nn);
 		mlx5_lag_mp_cleanup(ldev);
 		cancel_delayed_work_sync(&ldev->bond_work);
 		mlx5_lag_dev_free(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index f1068aac6406..316ab09e2664 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -44,6 +44,7 @@ struct mlx5_lag {
 	struct workqueue_struct   *wq;
 	struct delayed_work       bond_work;
 	struct notifier_block     nb;
+	struct netdev_net_notifier	nn;
 	struct lag_mp             lag_mp;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index b70afa310ad2..416676c35b1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -200,8 +200,6 @@ static void mlx5_lag_fib_update(struct work_struct *work)
 	rtnl_lock();
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
-	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		mlx5_lag_fib_route_event(ldev, fib_work->event,
 					 fib_work->fen_info.fi);
@@ -259,8 +257,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
 
 	switch (event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
-	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		fen_info = container_of(info, struct fib_entry_notifier_info,
 					info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index cf7b8da0f010..f554cfddcf4e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1563,6 +1563,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x101d) },			/* ConnectX-6 Dx */
 	{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF},	/* ConnectX Family mlx5Gen Virtual Function */
 	{ PCI_VDEVICE(MELLANOX, 0x101f) },			/* ConnectX-6 LX */
+	{ PCI_VDEVICE(MELLANOX, 0x1021) },			/* ConnectX-7 */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d2) },			/* BlueField integrated ConnectX-5 network controller */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},	/* BlueField integrated ConnectX-5 network controller VF */
 	{ PCI_VDEVICE(MELLANOX, 0xa2d6) },			/* BlueField-2 integrated ConnectX-6 Dx network controller */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index da67b28d6e23..fcce9e0fc82c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -157,7 +157,7 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
 			u8 feature_group, u8 access_reg_group);
 
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
-void mlx5_lag_remove(struct mlx5_core_dev *dev);
+void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev);
 
 int mlx5_irq_table_init(struct mlx5_core_dev *dev);
 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 004c56c2fc0c..6dec2a550a10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -677,9 +677,12 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 					goto out_invalid_arg;
 				}
 				if (action->dest_tbl.tbl->level <= matcher->tbl->level) {
+					mlx5_core_warn_once(dmn->mdev,
+							    "Connecting table to a lower/same level destination table\n");
 					mlx5dr_dbg(dmn,
-						   "Destination table level should be higher than source table\n");
-					goto out_invalid_arg;
+						   "Connecting table at level %d to a destination table at level %d\n",
+						   matcher->tbl->level,
+						   action->dest_tbl.tbl->level);
 				}
 				attr.final_icm_addr = rx_rule ?
 					action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
@@ -690,9 +693,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 
 				/* get the relevant addresses */
 				if (!action->dest_tbl.fw_tbl.rx_icm_addr) {
-					ret = mlx5dr_cmd_query_flow_table(action->dest_tbl.fw_tbl.mdev,
-									  action->dest_tbl.fw_tbl.ft->type,
-									  action->dest_tbl.fw_tbl.ft->id,
+					ret = mlx5dr_cmd_query_flow_table(dmn->mdev,
+									  action->dest_tbl.fw_tbl.type,
+									  action->dest_tbl.fw_tbl.id,
 									  &output);
 					if (!ret) {
 						action->dest_tbl.fw_tbl.tx_icm_addr =
@@ -982,8 +985,106 @@ dec_ref:
 }
 
 struct mlx5dr_action *
-mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
-					struct mlx5_core_dev *mdev)
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+				   struct mlx5dr_action_dest *dests,
+				   u32 num_of_dests)
+{
+	struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
+	struct mlx5dr_action **ref_actions;
+	struct mlx5dr_action *action;
+	bool reformat_req = false;
+	u32 num_of_ref = 0;
+	int ret;
+	int i;
+
+	if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+		mlx5dr_err(dmn, "Multiple destination support is for FDB only\n");
+		return NULL;
+	}
+
+	hw_dests = kzalloc(sizeof(*hw_dests) * num_of_dests, GFP_KERNEL);
+	if (!hw_dests)
+		return NULL;
+
+	ref_actions = kzalloc(sizeof(*ref_actions) * num_of_dests * 2, GFP_KERNEL);
+	if (!ref_actions)
+		goto free_hw_dests;
+
+	for (i = 0; i < num_of_dests; i++) {
+		struct mlx5dr_action *reformat_action = dests[i].reformat;
+		struct mlx5dr_action *dest_action = dests[i].dest;
+
+		ref_actions[num_of_ref++] = dest_action;
+
+		switch (dest_action->action_type) {
+		case DR_ACTION_TYP_VPORT:
+			hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+			hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+			hw_dests[i].vport.num = dest_action->vport.caps->num;
+			hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi;
+			if (reformat_action) {
+				reformat_req = true;
+				hw_dests[i].vport.reformat_id =
+					reformat_action->reformat.reformat_id;
+				ref_actions[num_of_ref++] = reformat_action;
+				hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+			}
+			break;
+
+		case DR_ACTION_TYP_FT:
+			hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+			if (dest_action->dest_tbl.is_fw_tbl)
+				hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id;
+			else
+				hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id;
+			break;
+
+		default:
+			mlx5dr_dbg(dmn, "Invalid multiple destinations action\n");
+			goto free_ref_actions;
+		}
+	}
+
+	action = dr_action_create_generic(DR_ACTION_TYP_FT);
+	if (!action)
+		goto free_ref_actions;
+
+	ret = mlx5dr_fw_create_md_tbl(dmn,
+				      hw_dests,
+				      num_of_dests,
+				      reformat_req,
+				      &action->dest_tbl.fw_tbl.id,
+				      &action->dest_tbl.fw_tbl.group_id);
+	if (ret)
+		goto free_action;
+
+	refcount_inc(&dmn->refcount);
+
+	for (i = 0; i < num_of_ref; i++)
+		refcount_inc(&ref_actions[i]->refcount);
+
+	action->dest_tbl.is_fw_tbl = true;
+	action->dest_tbl.fw_tbl.dmn = dmn;
+	action->dest_tbl.fw_tbl.type = FS_FT_FDB;
+	action->dest_tbl.fw_tbl.ref_actions = ref_actions;
+	action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref;
+
+	kfree(hw_dests);
+
+	return action;
+
+free_action:
+	kfree(action);
+free_ref_actions:
+	kfree(ref_actions);
+free_hw_dests:
+	kfree(hw_dests);
+	return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn,
+					struct mlx5_flow_table *ft)
 {
 	struct mlx5dr_action *action;
 
@@ -992,8 +1093,11 @@ mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
 		return NULL;
 
 	action->dest_tbl.is_fw_tbl = 1;
-	action->dest_tbl.fw_tbl.ft = ft;
-	action->dest_tbl.fw_tbl.mdev = mdev;
+	action->dest_tbl.fw_tbl.type = ft->type;
+	action->dest_tbl.fw_tbl.id = ft->id;
+	action->dest_tbl.fw_tbl.dmn = dmn;
+
+	refcount_inc(&dmn->refcount);
 
 	return action;
 }
@@ -1213,58 +1317,85 @@ not_found:
 }
 
 static int
-dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn,
-			  __be64 *sw_action,
-			  __be64 *hw_action,
-			  const struct dr_action_modify_field_conv **ret_hw_info)
+dr_action_modify_sw_to_hw_add(struct mlx5dr_domain *dmn,
+			      __be64 *sw_action,
+			      __be64 *hw_action,
+			      const struct dr_action_modify_field_conv **ret_hw_info)
 {
 	const struct dr_action_modify_field_conv *hw_action_info;
-	u8 offset, length, max_length, action;
+	u8 max_length;
 	u16 sw_field;
-	u8 hw_opcode;
 	u32 data;
 
 	/* Get SW modify action data */
-	action = MLX5_GET(set_action_in, sw_action, action_type);
-	length = MLX5_GET(set_action_in, sw_action, length);
-	offset = MLX5_GET(set_action_in, sw_action, offset);
 	sw_field = MLX5_GET(set_action_in, sw_action, field);
 	data = MLX5_GET(set_action_in, sw_action, data);
 
 	/* Convert SW data to HW modify action format */
 	hw_action_info = dr_action_modify_get_hw_info(sw_field);
 	if (!hw_action_info) {
-		mlx5dr_dbg(dmn, "Modify action invalid field given\n");
+		mlx5dr_dbg(dmn, "Modify add action invalid field given\n");
 		return -EINVAL;
 	}
 
 	max_length = hw_action_info->end - hw_action_info->start + 1;
 
-	switch (action) {
-	case MLX5_ACTION_TYPE_SET:
-		hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_SET;
-		/* PRM defines that length zero specific length of 32bits */
-		if (!length)
-			length = 32;
+	MLX5_SET(dr_action_hw_set, hw_action,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_ADD);
 
-		if (length + offset > max_length) {
-			mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n");
-			return -EINVAL;
-		}
-		break;
+	MLX5_SET(dr_action_hw_set, hw_action, destination_field_code,
+		 hw_action_info->hw_field);
 
-	case MLX5_ACTION_TYPE_ADD:
-		hw_opcode = MLX5DR_ACTION_MDFY_HW_OP_ADD;
-		offset = 0;
-		length = max_length;
-		break;
+	MLX5_SET(dr_action_hw_set, hw_action, destination_left_shifter,
+		 hw_action_info->start);
 
-	default:
-		mlx5dr_info(dmn, "Unsupported action_type for modify action\n");
-		return -EOPNOTSUPP;
+	/* PRM defines that length zero specific length of 32bits */
+	MLX5_SET(dr_action_hw_set, hw_action, destination_length,
+		 max_length == 32 ? 0 : max_length);
+
+	MLX5_SET(dr_action_hw_set, hw_action, inline_data, data);
+
+	*ret_hw_info = hw_action_info;
+
+	return 0;
+}
+
+static int
+dr_action_modify_sw_to_hw_set(struct mlx5dr_domain *dmn,
+			      __be64 *sw_action,
+			      __be64 *hw_action,
+			      const struct dr_action_modify_field_conv **ret_hw_info)
+{
+	const struct dr_action_modify_field_conv *hw_action_info;
+	u8 offset, length, max_length;
+	u16 sw_field;
+	u32 data;
+
+	/* Get SW modify action data */
+	length = MLX5_GET(set_action_in, sw_action, length);
+	offset = MLX5_GET(set_action_in, sw_action, offset);
+	sw_field = MLX5_GET(set_action_in, sw_action, field);
+	data = MLX5_GET(set_action_in, sw_action, data);
+
+	/* Convert SW data to HW modify action format */
+	hw_action_info = dr_action_modify_get_hw_info(sw_field);
+	if (!hw_action_info) {
+		mlx5dr_dbg(dmn, "Modify set action invalid field given\n");
+		return -EINVAL;
+	}
+
+	/* PRM defines that length zero specific length of 32bits */
+	length = length ? length : 32;
+
+	max_length = hw_action_info->end - hw_action_info->start + 1;
+
+	if (length + offset > max_length) {
+		mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n");
+		return -EINVAL;
 	}
 
-	MLX5_SET(dr_action_hw_set, hw_action, opcode, hw_opcode);
+	MLX5_SET(dr_action_hw_set, hw_action,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_SET);
 
 	MLX5_SET(dr_action_hw_set, hw_action, destination_field_code,
 		 hw_action_info->hw_field);
@@ -1283,48 +1414,236 @@ dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn,
 }
 
 static int
-dr_action_modify_check_field_limitation(struct mlx5dr_domain *dmn,
-					const __be64 *sw_action)
+dr_action_modify_sw_to_hw_copy(struct mlx5dr_domain *dmn,
+			       __be64 *sw_action,
+			       __be64 *hw_action,
+			       const struct dr_action_modify_field_conv **ret_dst_hw_info,
+			       const struct dr_action_modify_field_conv **ret_src_hw_info)
+{
+	u8 src_offset, dst_offset, src_max_length, dst_max_length, length;
+	const struct dr_action_modify_field_conv *hw_dst_action_info;
+	const struct dr_action_modify_field_conv *hw_src_action_info;
+	u16 src_field, dst_field;
+
+	/* Get SW modify action data */
+	src_field = MLX5_GET(copy_action_in, sw_action, src_field);
+	dst_field = MLX5_GET(copy_action_in, sw_action, dst_field);
+	src_offset = MLX5_GET(copy_action_in, sw_action, src_offset);
+	dst_offset = MLX5_GET(copy_action_in, sw_action, dst_offset);
+	length = MLX5_GET(copy_action_in, sw_action, length);
+
+	/* Convert SW data to HW modify action format */
+	hw_src_action_info = dr_action_modify_get_hw_info(src_field);
+	hw_dst_action_info = dr_action_modify_get_hw_info(dst_field);
+	if (!hw_src_action_info || !hw_dst_action_info) {
+		mlx5dr_dbg(dmn, "Modify copy action invalid field given\n");
+		return -EINVAL;
+	}
+
+	/* PRM defines that length zero specific length of 32bits */
+	length = length ? length : 32;
+
+	src_max_length = hw_src_action_info->end -
+			 hw_src_action_info->start + 1;
+	dst_max_length = hw_dst_action_info->end -
+			 hw_dst_action_info->start + 1;
+
+	if (length + src_offset > src_max_length ||
+	    length + dst_offset > dst_max_length) {
+		mlx5dr_dbg(dmn, "Modify action length + offset exceeds limit\n");
+		return -EINVAL;
+	}
+
+	MLX5_SET(dr_action_hw_copy, hw_action,
+		 opcode, MLX5DR_ACTION_MDFY_HW_OP_COPY);
+
+	MLX5_SET(dr_action_hw_copy, hw_action, destination_field_code,
+		 hw_dst_action_info->hw_field);
+
+	MLX5_SET(dr_action_hw_copy, hw_action, destination_left_shifter,
+		 hw_dst_action_info->start + dst_offset);
+
+	MLX5_SET(dr_action_hw_copy, hw_action, destination_length,
+		 length == 32 ? 0 : length);
+
+	MLX5_SET(dr_action_hw_copy, hw_action, source_field_code,
+		 hw_src_action_info->hw_field);
+
+	MLX5_SET(dr_action_hw_copy, hw_action, source_left_shifter,
+		 hw_src_action_info->start + dst_offset);
+
+	*ret_dst_hw_info = hw_dst_action_info;
+	*ret_src_hw_info = hw_src_action_info;
+
+	return 0;
+}
+
+static int
+dr_action_modify_sw_to_hw(struct mlx5dr_domain *dmn,
+			  __be64 *sw_action,
+			  __be64 *hw_action,
+			  const struct dr_action_modify_field_conv **ret_dst_hw_info,
+			  const struct dr_action_modify_field_conv **ret_src_hw_info)
 {
-	u16 sw_field;
 	u8 action;
+	int ret;
 
-	sw_field = MLX5_GET(set_action_in, sw_action, field);
+	*hw_action = 0;
+	*ret_src_hw_info = NULL;
+
+	/* Get SW modify action type */
 	action = MLX5_GET(set_action_in, sw_action, action_type);
 
-	/* Check if SW field is supported in current domain (RX/TX) */
-	if (action == MLX5_ACTION_TYPE_SET) {
-		if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
+	switch (action) {
+	case MLX5_ACTION_TYPE_SET:
+		ret = dr_action_modify_sw_to_hw_set(dmn, sw_action,
+						    hw_action,
+						    ret_dst_hw_info);
+		break;
+
+	case MLX5_ACTION_TYPE_ADD:
+		ret = dr_action_modify_sw_to_hw_add(dmn, sw_action,
+						    hw_action,
+						    ret_dst_hw_info);
+		break;
+
+	case MLX5_ACTION_TYPE_COPY:
+		ret = dr_action_modify_sw_to_hw_copy(dmn, sw_action,
+						     hw_action,
+						     ret_dst_hw_info,
+						     ret_src_hw_info);
+		break;
+
+	default:
+		mlx5dr_info(dmn, "Unsupported action_type for modify action\n");
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+static int
+dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action,
+					    const __be64 *sw_action)
+{
+	u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
+	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+
+	if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
+		action->rewrite.allow_rx = 0;
+		if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
+			mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
+				   sw_field);
+			return -EINVAL;
+		}
+	} else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
+		action->rewrite.allow_tx = 0;
+		if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
+			mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
+				   sw_field);
+			return -EINVAL;
+		}
+	}
+
+	if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) {
+		mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action,
+					    const __be64 *sw_action)
+{
+	u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
+	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+
+	if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL &&
+	    sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT &&
+	    sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM &&
+	    sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) {
+		mlx5dr_dbg(dmn, "Unsupported field %d for add action\n",
+			   sw_field);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
+					     const __be64 *sw_action)
+{
+	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+	u16 sw_fields[2];
+	int i;
+
+	sw_fields[0] = MLX5_GET(copy_action_in, sw_action, src_field);
+	sw_fields[1] = MLX5_GET(copy_action_in, sw_action, dst_field);
+
+	for (i = 0; i < 2; i++) {
+		if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
+			action->rewrite.allow_rx = 0;
 			if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
 				mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
-					   sw_field);
+					   sw_fields[i]);
 				return -EINVAL;
 			}
-		}
-
-		if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
+		} else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
+			action->rewrite.allow_tx = 0;
 			if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
 				mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
-					   sw_field);
+					   sw_fields[i]);
 				return -EINVAL;
 			}
 		}
-	} else if (action == MLX5_ACTION_TYPE_ADD) {
-		if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL &&
-		    sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT &&
-		    sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM &&
-		    sw_field != MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM) {
-			mlx5dr_dbg(dmn, "Unsupported field %d for add action\n", sw_field);
-			return -EINVAL;
-		}
-	} else {
-		mlx5dr_info(dmn, "Unsupported action %d modify action\n", action);
-		return -EOPNOTSUPP;
+	}
+
+	if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) {
+		mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n");
+		return -EINVAL;
 	}
 
 	return 0;
 }
 
+static int
+dr_action_modify_check_field_limitation(struct mlx5dr_action *action,
+					const __be64 *sw_action)
+{
+	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+	u8 action_type;
+	int ret;
+
+	action_type = MLX5_GET(set_action_in, sw_action, action_type);
+
+	switch (action_type) {
+	case MLX5_ACTION_TYPE_SET:
+		ret = dr_action_modify_check_set_field_limitation(action,
+								  sw_action);
+		break;
+
+	case MLX5_ACTION_TYPE_ADD:
+		ret = dr_action_modify_check_add_field_limitation(action,
+								  sw_action);
+		break;
+
+	case MLX5_ACTION_TYPE_COPY:
+		ret = dr_action_modify_check_copy_field_limitation(action,
+								   sw_action);
+		break;
+
+	default:
+		mlx5dr_info(dmn, "Unsupported action %d modify action\n",
+			    action_type);
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
 static bool
 dr_action_modify_check_is_ttl_modify(const u64 *sw_action)
 {
@@ -1333,7 +1652,7 @@ dr_action_modify_check_is_ttl_modify(const u64 *sw_action)
 	return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL;
 }
 
-static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn,
+static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
 					    u32 max_hw_actions,
 					    u32 num_sw_actions,
 					    __be64 sw_actions[],
@@ -1341,20 +1660,26 @@ static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn,
 					    u32 *num_hw_actions,
 					    bool *modify_ttl)
 {
-	const struct dr_action_modify_field_conv *hw_action_info;
+	const struct dr_action_modify_field_conv *hw_dst_action_info;
+	const struct dr_action_modify_field_conv *hw_src_action_info;
 	u16 hw_field = MLX5DR_ACTION_MDFY_HW_FLD_RESERVED;
 	u32 l3_type = MLX5DR_ACTION_MDFY_HW_HDR_L3_NONE;
 	u32 l4_type = MLX5DR_ACTION_MDFY_HW_HDR_L4_NONE;
+	struct mlx5dr_domain *dmn = action->rewrite.dmn;
 	int ret, i, hw_idx = 0;
 	__be64 *sw_action;
 	__be64 hw_action;
 
 	*modify_ttl = false;
 
+	action->rewrite.allow_rx = 1;
+	action->rewrite.allow_tx = 1;
+
 	for (i = 0; i < num_sw_actions; i++) {
 		sw_action = &sw_actions[i];
 
-		ret = dr_action_modify_check_field_limitation(dmn, sw_action);
+		ret = dr_action_modify_check_field_limitation(action,
+							      sw_action);
 		if (ret)
 			return ret;
 
@@ -1365,32 +1690,35 @@ static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn,
 		ret = dr_action_modify_sw_to_hw(dmn,
 						sw_action,
 						&hw_action,
-						&hw_action_info);
+						&hw_dst_action_info,
+						&hw_src_action_info);
 		if (ret)
 			return ret;
 
 		/* Due to a HW limitation we cannot modify 2 different L3 types */
-		if (l3_type && hw_action_info->l3_type &&
-		    hw_action_info->l3_type != l3_type) {
+		if (l3_type && hw_dst_action_info->l3_type &&
+		    hw_dst_action_info->l3_type != l3_type) {
 			mlx5dr_dbg(dmn, "Action list can't support two different L3 types\n");
 			return -EINVAL;
 		}
-		if (hw_action_info->l3_type)
-			l3_type = hw_action_info->l3_type;
+		if (hw_dst_action_info->l3_type)
+			l3_type = hw_dst_action_info->l3_type;
 
 		/* Due to a HW limitation we cannot modify two different L4 types */
-		if (l4_type && hw_action_info->l4_type &&
-		    hw_action_info->l4_type != l4_type) {
+		if (l4_type && hw_dst_action_info->l4_type &&
+		    hw_dst_action_info->l4_type != l4_type) {
 			mlx5dr_dbg(dmn, "Action list can't support two different L4 types\n");
 			return -EINVAL;
 		}
-		if (hw_action_info->l4_type)
-			l4_type = hw_action_info->l4_type;
+		if (hw_dst_action_info->l4_type)
+			l4_type = hw_dst_action_info->l4_type;
 
 		/* HW reads and executes two actions at once this means we
 		 * need to create a gap if two actions access the same field
 		 */
-		if ((hw_idx % 2) && hw_field == hw_action_info->hw_field) {
+		if ((hw_idx % 2) && (hw_field == hw_dst_action_info->hw_field ||
+				     (hw_src_action_info &&
+				      hw_field == hw_src_action_info->hw_field))) {
 			/* Check if after gap insertion the total number of HW
 			 * modify actions doesn't exceeds the limit
 			 */
@@ -1400,7 +1728,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_domain *dmn,
 				return -EINVAL;
 			}
 		}
-		hw_field = hw_action_info->hw_field;
+		hw_field = hw_dst_action_info->hw_field;
 
 		hw_actions[hw_idx] = hw_action;
 		hw_idx++;
@@ -1443,7 +1771,7 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
 		goto free_chunk;
 	}
 
-	ret = dr_actions_convert_modify_header(dmn,
+	ret = dr_actions_convert_modify_header(action,
 					       max_hw_actions,
 					       num_sw_actions,
 					       actions,
@@ -1559,8 +1887,26 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
 
 	switch (action->action_type) {
 	case DR_ACTION_TYP_FT:
-		if (!action->dest_tbl.is_fw_tbl)
+		if (action->dest_tbl.is_fw_tbl)
+			refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount);
+		else
 			refcount_dec(&action->dest_tbl.tbl->refcount);
+
+		if (action->dest_tbl.is_fw_tbl &&
+		    action->dest_tbl.fw_tbl.num_of_ref_actions) {
+			struct mlx5dr_action **ref_actions;
+			int i;
+
+			ref_actions = action->dest_tbl.fw_tbl.ref_actions;
+			for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++)
+				refcount_dec(&ref_actions[i]->refcount);
+
+			kfree(ref_actions);
+
+			mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn,
+						 action->dest_tbl.fw_tbl.id,
+						 action->dest_tbl.fw_tbl.group_id);
+		}
 		break;
 	case DR_ACTION_TYP_TNL_L2_TO_L2:
 		refcount_dec(&action->reformat.dmn->refcount);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 41662c4e2664..461b39376daf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -320,12 +320,7 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
 }
 
 int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
-				 u32 table_type,
-				 u64 icm_addr_rx,
-				 u64 icm_addr_tx,
-				 u8 level,
-				 bool sw_owner,
-				 bool term_tbl,
+				 struct mlx5dr_cmd_create_flow_table_attr *attr,
 				 u64 *fdb_rx_icm_addr,
 				 u32 *table_id)
 {
@@ -335,37 +330,43 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
 	int err;
 
 	MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
-	MLX5_SET(create_flow_table_in, in, table_type, table_type);
+	MLX5_SET(create_flow_table_in, in, table_type, attr->table_type);
 
 	ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
-	MLX5_SET(flow_table_context, ft_mdev, termination_table, term_tbl);
-	MLX5_SET(flow_table_context, ft_mdev, sw_owner, sw_owner);
-	MLX5_SET(flow_table_context, ft_mdev, level, level);
+	MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl);
+	MLX5_SET(flow_table_context, ft_mdev, sw_owner, attr->sw_owner);
+	MLX5_SET(flow_table_context, ft_mdev, level, attr->level);
 
-	if (sw_owner) {
+	if (attr->sw_owner) {
 		/* icm_addr_0 used for FDB RX / NIC TX / NIC_RX
 		 * icm_addr_1 used for FDB TX
 		 */
-		if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
+		if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_0, icm_addr_rx);
-		} else if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
+				   sw_owner_icm_root_0, attr->icm_addr_rx);
+		} else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_0, icm_addr_tx);
-		} else if (table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
+				   sw_owner_icm_root_0, attr->icm_addr_tx);
+		} else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_0, icm_addr_rx);
+				   sw_owner_icm_root_0, attr->icm_addr_rx);
 			MLX5_SET64(flow_table_context, ft_mdev,
-				   sw_owner_icm_root_1, icm_addr_tx);
+				   sw_owner_icm_root_1, attr->icm_addr_tx);
 		}
 	}
 
+	MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
+		 attr->decap_en);
+	MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+		 attr->reformat_en);
+
 	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
 
 	*table_id = MLX5_GET(create_flow_table_out, out, table_id);
-	if (!sw_owner && table_type == MLX5_FLOW_TABLE_TYPE_FDB)
+	if (!attr->sw_owner && attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB &&
+	    fdb_rx_icm_addr)
 		*fdb_rx_icm_addr =
 		(u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) |
 		(u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 |
@@ -478,3 +479,208 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
 
 	return 0;
 }
+
+static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev,
+					struct mlx5dr_cmd_fte_info *fte,
+					bool *extended_dest)
+{
+	int fw_log_max_fdb_encap_uplink = MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+	int num_fwd_destinations = 0;
+	int num_encap = 0;
+	int i;
+
+	*extended_dest = false;
+	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+		return 0;
+	for (i = 0; i < fte->dests_size; i++) {
+		if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+			continue;
+		if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+		    fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+			num_encap++;
+		num_fwd_destinations++;
+	}
+
+	if (num_fwd_destinations > 1 && num_encap > 0)
+		*extended_dest = true;
+
+	if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+		mlx5_core_warn(dev, "FW does not support extended destination");
+		return -EOPNOTSUPP;
+	}
+	if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+		mlx5_core_warn(dev, "FW does not support more than %d encaps",
+			       1 << fw_log_max_fdb_encap_uplink);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
+		       int opmod, int modify_mask,
+		       struct mlx5dr_cmd_ft_info *ft,
+		       u32 group_id,
+		       struct mlx5dr_cmd_fte_info *fte)
+{
+	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {};
+	void *in_flow_context, *vlan;
+	bool extended_dest = false;
+	void *in_match_value;
+	unsigned int inlen;
+	int dst_cnt_size;
+	void *in_dests;
+	u32 *in;
+	int err;
+	int i;
+
+	if (mlx5dr_cmd_set_extended_dest(dev, fte, &extended_dest))
+		return -EOPNOTSUPP;
+
+	if (!extended_dest)
+		dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+	else
+		dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+	inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+	MLX5_SET(set_fte_in, in, op_mod, opmod);
+	MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask);
+	MLX5_SET(set_fte_in, in, table_type, ft->type);
+	MLX5_SET(set_fte_in, in, table_id, ft->id);
+	MLX5_SET(set_fte_in, in, flow_index, fte->index);
+	if (ft->vport) {
+		MLX5_SET(set_fte_in, in, vport_number, ft->vport);
+		MLX5_SET(set_fte_in, in, other_vport, 1);
+	}
+
+	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+
+	MLX5_SET(flow_context, in_flow_context, flow_tag,
+		 fte->flow_context.flow_tag);
+	MLX5_SET(flow_context, in_flow_context, flow_source,
+		 fte->flow_context.flow_source);
+
+	MLX5_SET(flow_context, in_flow_context, extended_destination,
+		 extended_dest);
+	if (extended_dest) {
+		u32 action;
+
+		action = fte->action.action &
+			~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+		MLX5_SET(flow_context, in_flow_context, action, action);
+	} else {
+		MLX5_SET(flow_context, in_flow_context, action,
+			 fte->action.action);
+		if (fte->action.pkt_reformat)
+			MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+				 fte->action.pkt_reformat->id);
+	}
+	if (fte->action.modify_hdr)
+		MLX5_SET(flow_context, in_flow_context, modify_header_id,
+			 fte->action.modify_hdr->id);
+
+	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
+
+	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
+				      match_value);
+	memcpy(in_match_value, fte->val, sizeof(u32) * MLX5_ST_SZ_DW_MATCH_PARAM);
+
+	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		int list_size = 0;
+
+		for (i = 0; i < fte->dests_size; i++) {
+			unsigned int id, type = fte->dest_arr[i].type;
+
+			if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+				continue;
+
+			switch (type) {
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+				id = fte->dest_arr[i].ft_num;
+				type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+				id = fte->dest_arr[i].ft_id;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+				id = fte->dest_arr[i].vport.num;
+				MLX5_SET(dest_format_struct, in_dests,
+					 destination_eswitch_owner_vhca_id_valid,
+					 !!(fte->dest_arr[i].vport.flags &
+					    MLX5_FLOW_DEST_VPORT_VHCA_ID));
+				MLX5_SET(dest_format_struct, in_dests,
+					 destination_eswitch_owner_vhca_id,
+					 fte->dest_arr[i].vport.vhca_id);
+				if (extended_dest && (fte->dest_arr[i].vport.flags &
+						    MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) {
+					MLX5_SET(dest_format_struct, in_dests,
+						 packet_reformat,
+						 !!(fte->dest_arr[i].vport.flags &
+						    MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+					MLX5_SET(extended_dest_format, in_dests,
+						 packet_reformat_id,
+						 fte->dest_arr[i].vport.reformat_id);
+				}
+				break;
+			default:
+				id = fte->dest_arr[i].tir_num;
+			}
+
+			MLX5_SET(dest_format_struct, in_dests, destination_type,
+				 type);
+			MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+			in_dests += dst_cnt_size;
+			list_size++;
+		}
+
+		MLX5_SET(flow_context, in_flow_context, destination_list_size,
+			 list_size);
+	}
+
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
+					log_max_flow_counter,
+					ft->type));
+		int list_size = 0;
+
+		for (i = 0; i < fte->dests_size; i++) {
+			if (fte->dest_arr[i].type !=
+			    MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+				continue;
+
+			MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+				 fte->dest_arr[i].counter_id);
+			in_dests += dst_cnt_size;
+			list_size++;
+		}
+		if (list_size > max_list_size) {
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+			 list_size);
+	}
+
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+err_out:
+	kvfree(in);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
index 60ef6e6171e3..1fbcd012bb85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -7,6 +7,7 @@
 struct mlx5dr_fw_recalc_cs_ft *
 mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num)
 {
+	struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
 	struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
 	u32 table_id, group_id, modify_hdr_id;
 	u64 rx_icm_addr, modify_ttl_action;
@@ -16,9 +17,14 @@ mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num)
 	if (!recalc_cs_ft)
 		return NULL;
 
-	ret = mlx5dr_cmd_create_flow_table(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB,
-					   0, 0, dmn->info.caps.max_ft_level - 1,
-					   false, true, &rx_icm_addr, &table_id);
+	ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+	ft_attr.level = dmn->info.caps.max_ft_level - 1;
+	ft_attr.term_tbl = true;
+
+	ret = mlx5dr_cmd_create_flow_table(dmn->mdev,
+					   &ft_attr,
+					   &rx_icm_addr,
+					   &table_id);
 	if (ret) {
 		mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret);
 		goto free_ttl_tbl;
@@ -91,3 +97,70 @@ void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
 
 	kfree(recalc_cs_ft);
 }
+
+int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+			    struct mlx5dr_cmd_flow_destination_hw_info *dest,
+			    int num_dest,
+			    bool reformat_req,
+			    u32 *tbl_id,
+			    u32 *group_id)
+{
+	struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
+	struct mlx5dr_cmd_fte_info fte_info = {};
+	u32 val[MLX5_ST_SZ_DW_MATCH_PARAM] = {};
+	struct mlx5dr_cmd_ft_info ft_info = {};
+	int ret;
+
+	ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+	ft_attr.level = dmn->info.caps.max_ft_level - 2;
+	ft_attr.reformat_en = reformat_req;
+	ft_attr.decap_en = reformat_req;
+
+	ret = mlx5dr_cmd_create_flow_table(dmn->mdev, &ft_attr, NULL, tbl_id);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed creating multi dest FW flow table %d\n", ret);
+		return ret;
+	}
+
+	ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev,
+						 MLX5_FLOW_TABLE_TYPE_FDB,
+						 *tbl_id, group_id);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed creating multi dest FW flow group %d\n", ret);
+		goto free_flow_table;
+	}
+
+	ft_info.id = *tbl_id;
+	ft_info.type = FS_FT_FDB;
+	fte_info.action.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	fte_info.dests_size = num_dest;
+	fte_info.val = val;
+	fte_info.dest_arr = dest;
+
+	ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
+	if (ret) {
+		mlx5dr_err(dmn, "Failed setting fte into table %d\n", ret);
+		goto free_flow_group;
+	}
+
+	return 0;
+
+free_flow_group:
+	mlx5dr_cmd_destroy_flow_group(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB,
+				      *tbl_id, *group_id);
+free_flow_table:
+	mlx5dr_cmd_destroy_flow_table(dmn->mdev, *tbl_id,
+				      MLX5_FLOW_TABLE_TYPE_FDB);
+	return ret;
+}
+
+void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn,
+			      u32 tbl_id, u32 group_id)
+{
+	mlx5dr_cmd_del_flow_table_entry(dmn->mdev, FS_FT_FDB, tbl_id);
+	mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+				      MLX5_FLOW_TABLE_TYPE_FDB,
+				      tbl_id, group_id);
+	mlx5dr_cmd_destroy_flow_table(dmn->mdev, tbl_id,
+				      MLX5_FLOW_TABLE_TYPE_FDB);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 51803eef13dd..c7f10d4f8f8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2019 Mellanox Technologies. */
 
+#include <linux/smp.h>
 #include "dr_types.h"
 
 #define QUEUE_SIZE 128
@@ -729,7 +730,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 	if (!in)
 		goto err_cqwq;
 
-	vector = smp_processor_id() % mlx5_comp_vectors_count(mdev);
+	vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
 	err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
 	if (err) {
 		kvfree(in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index e178d8d3dbc9..14ce2d7dbb66 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -211,6 +211,9 @@ static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
 
 static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
 {
+	bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+	bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+	struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
 	u64 icm_addr_rx = 0;
 	u64 icm_addr_tx = 0;
 	int ret;
@@ -221,18 +224,21 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
 	if (tbl->tx.s_anchor)
 		icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr;
 
-	ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev,
-					   tbl->table_type,
-					   icm_addr_rx,
-					   icm_addr_tx,
-					   tbl->dmn->info.caps.max_ft_level - 1,
-					   true, false, NULL,
-					   &tbl->table_id);
+	ft_attr.table_type = tbl->table_type;
+	ft_attr.icm_addr_rx = icm_addr_rx;
+	ft_attr.icm_addr_tx = icm_addr_tx;
+	ft_attr.level = tbl->dmn->info.caps.max_ft_level - 1;
+	ft_attr.sw_owner = true;
+	ft_attr.decap_en = en_decap;
+	ft_attr.reformat_en = en_encap;
+
+	ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr,
+					   NULL, &tbl->table_id);
 
 	return ret;
 }
 
-struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level)
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags)
 {
 	struct mlx5dr_table *tbl;
 	int ret;
@@ -245,6 +251,7 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level)
 
 	tbl->dmn = dmn;
 	tbl->level = level;
+	tbl->flags = flags;
 	refcount_set(&tbl->refcount, 1);
 
 	ret = dr_table_init(tbl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 3fdf4a5eb031..dffe35145d19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -681,6 +681,7 @@ struct mlx5dr_table {
 	u32 level;
 	u32 table_type;
 	u32 table_id;
+	u32 flags;
 	struct list_head matcher_list;
 	struct mlx5dr_action *miss_action;
 	refcount_t refcount;
@@ -744,10 +745,14 @@ struct mlx5dr_action {
 			union {
 				struct mlx5dr_table *tbl;
 				struct {
-					struct mlx5_flow_table *ft;
+					struct mlx5dr_domain *dmn;
+					u32 id;
+					u32 group_id;
+					enum fs_flow_table_type type;
 					u64 rx_icm_addr;
 					u64 tx_icm_addr;
-					struct mlx5_core_dev *mdev;
+					struct mlx5dr_action **ref_actions;
+					u32 num_of_ref_actions;
 				} fw_tbl;
 			};
 		} dest_tbl;
@@ -869,6 +874,17 @@ struct mlx5dr_cmd_query_flow_table_details {
 	u64 sw_owner_icm_root_0;
 };
 
+struct mlx5dr_cmd_create_flow_table_attr {
+	u32 table_type;
+	u64 icm_addr_rx;
+	u64 icm_addr_tx;
+	u8 level;
+	bool sw_owner;
+	bool term_tbl;
+	bool decap_en;
+	bool reformat_en;
+};
+
 /* internal API functions */
 int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 			    struct mlx5dr_cmd_caps *caps);
@@ -906,12 +922,7 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
 				  u32 table_id,
 				  u32 group_id);
 int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
-				 u32 table_type,
-				 u64 icm_addr_rx,
-				 u64 icm_addr_tx,
-				 u8 level,
-				 bool sw_owner,
-				 bool term_tbl,
+				 struct mlx5dr_cmd_create_flow_table_attr *attr,
 				 u64 *fdb_rx_icm_addr,
 				 u32 *table_id);
 int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
@@ -1053,6 +1064,43 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
 				struct mlx5dr_action *action);
 
+struct mlx5dr_cmd_ft_info {
+	u32 id;
+	u16 vport;
+	enum fs_flow_table_type type;
+};
+
+struct mlx5dr_cmd_flow_destination_hw_info {
+	enum mlx5_flow_destination_type type;
+	union {
+		u32 tir_num;
+		u32 ft_num;
+		u32 ft_id;
+		u32 counter_id;
+		struct {
+			u16 num;
+			u16 vhca_id;
+			u32 reformat_id;
+			u8 flags;
+		} vport;
+	};
+};
+
+struct mlx5dr_cmd_fte_info {
+	u32 dests_size;
+	u32 index;
+	struct mlx5_flow_context flow_context;
+	u32 *val;
+	struct mlx5_flow_act action;
+	struct mlx5dr_cmd_flow_destination_hw_info *dest_arr;
+};
+
+int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
+		       int opmod, int modify_mask,
+		       struct mlx5dr_cmd_ft_info *ft,
+		       u32 group_id,
+		       struct mlx5dr_cmd_fte_info *fte);
+
 struct mlx5dr_fw_recalc_cs_ft {
 	u64 rx_icm_addr;
 	u32 table_id;
@@ -1067,4 +1115,12 @@ void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
 int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
 					      u32 vport_num,
 					      u64 *rx_icm_addr);
+int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+			    struct mlx5dr_cmd_flow_destination_hw_info *dest,
+			    int num_dest,
+			    bool reformat_req,
+			    u32 *tbl_id,
+			    u32 *group_id);
+void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
+			      u32 group_id);
 #endif  /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 3d587d0bdbbe..3abfc8125926 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -74,7 +74,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
 								    next_ft);
 
 	tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain,
-				  ft->level);
+				  ft->level, ft->flags);
 	if (!tbl) {
 		mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
 		return -EINVAL;
@@ -184,13 +184,13 @@ static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain,
 					       dest_attr->vport.vhca_id);
 }
 
-static struct mlx5dr_action *create_ft_action(struct mlx5_core_dev *dev,
+static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain,
 					      struct mlx5_flow_rule *dst)
 {
 	struct mlx5_flow_table *dest_ft = dst->dest_attr.ft;
 
 	if (mlx5_dr_is_fw_table(dest_ft->flags))
-		return mlx5dr_create_action_dest_flow_fw_table(dest_ft, dev);
+		return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft);
 	return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table);
 }
 
@@ -206,6 +206,12 @@ static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domai
 	return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr));
 }
 
+static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst)
+{
+	return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+		dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+}
+
 #define MLX5_FLOW_CONTEXT_ACTION_MAX  20
 static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 				  struct mlx5_flow_table *ft,
@@ -213,7 +219,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 				  struct fs_fte *fte)
 {
 	struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain;
-	struct mlx5dr_action *term_action = NULL;
+	struct mlx5dr_action_dest *term_actions;
 	struct mlx5dr_match_parameters params;
 	struct mlx5_core_dev *dev = ns->dev;
 	struct mlx5dr_action **fs_dr_actions;
@@ -223,6 +229,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	struct mlx5dr_rule *rule;
 	struct mlx5_flow_rule *dst;
 	int fs_dr_num_actions = 0;
+	int num_term_actions = 0;
 	int num_actions = 0;
 	size_t match_sz;
 	int err = 0;
@@ -233,18 +240,38 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 
 	actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions),
 			  GFP_KERNEL);
-	if (!actions)
-		return -ENOMEM;
+	if (!actions) {
+		err = -ENOMEM;
+		goto out_err;
+	}
 
 	fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
 				sizeof(*fs_dr_actions), GFP_KERNEL);
 	if (!fs_dr_actions) {
-		kfree(actions);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto free_actions_alloc;
+	}
+
+	term_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+			       sizeof(*term_actions), GFP_KERNEL);
+	if (!term_actions) {
+		err = -ENOMEM;
+		goto free_fs_dr_actions_alloc;
 	}
 
 	match_sz = sizeof(fte->val);
 
+	/* Drop reformat action bit if destination vport set with reformat */
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		list_for_each_entry(dst, &fte->node.children, node.list) {
+			if (!contain_vport_reformat_action(dst))
+				continue;
+
+			fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			break;
+		}
+	}
+
 	/* The order of the actions are must to be keep, only the following
 	 * order is supported by SW steering:
 	 * TX: push vlan -> modify header -> encap
@@ -335,7 +362,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 			goto free_actions;
 		}
 		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
-		term_action = tmp_action;
+		term_actions[num_term_actions++].dest = tmp_action;
 	}
 
 	if (fte->flow_context.flow_tag) {
@@ -352,34 +379,25 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		list_for_each_entry(dst, &fte->node.children, node.list) {
 			enum mlx5_flow_destination_type type = dst->dest_attr.type;
-			u32 id;
 
-			if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+			if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+			    num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) {
 				err = -ENOSPC;
 				goto free_actions;
 			}
 
-			switch (type) {
-			case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
-				id = dst->dest_attr.counter_id;
+			if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+				continue;
 
-				tmp_action =
-					mlx5dr_action_create_flow_counter(id);
-				if (!tmp_action) {
-					err = -ENOMEM;
-					goto free_actions;
-				}
-				fs_dr_actions[fs_dr_num_actions++] = tmp_action;
-				actions[num_actions++] = tmp_action;
-				break;
+			switch (type) {
 			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
-				tmp_action = create_ft_action(dev, dst);
+				tmp_action = create_ft_action(domain, dst);
 				if (!tmp_action) {
 					err = -ENOMEM;
 					goto free_actions;
 				}
 				fs_dr_actions[fs_dr_num_actions++] = tmp_action;
-				term_action = tmp_action;
+				term_actions[num_term_actions++].dest = tmp_action;
 				break;
 			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
 				tmp_action = create_vport_action(domain, dst);
@@ -388,7 +406,14 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 					goto free_actions;
 				}
 				fs_dr_actions[fs_dr_num_actions++] = tmp_action;
-				term_action = tmp_action;
+				term_actions[num_term_actions].dest = tmp_action;
+
+				if (dst->dest_attr.vport.flags &
+				    MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+					term_actions[num_term_actions].reformat =
+						dst->dest_attr.vport.pkt_reformat->action.dr_action;
+
+				num_term_actions++;
 				break;
 			default:
 				err = -EOPNOTSUPP;
@@ -397,11 +422,50 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		}
 	}
 
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		list_for_each_entry(dst, &fte->node.children, node.list) {
+			u32 id;
+
+			if (dst->dest_attr.type !=
+			    MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+				continue;
+
+			if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+				err = -ENOSPC;
+				goto free_actions;
+			}
+
+			id = dst->dest_attr.counter_id;
+			tmp_action =
+				mlx5dr_action_create_flow_counter(id);
+			if (!tmp_action) {
+				err = -ENOMEM;
+				goto free_actions;
+			}
+
+			fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+			actions[num_actions++] = tmp_action;
+		}
+	}
+
 	params.match_sz = match_sz;
 	params.match_buf = (u64 *)fte->val;
-
-	if (term_action)
-		actions[num_actions++] = term_action;
+	if (num_term_actions == 1) {
+		if (term_actions->reformat)
+			actions[num_actions++] = term_actions->reformat;
+
+		actions[num_actions++] = term_actions->dest;
+	} else if (num_term_actions > 1) {
+		tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
+								term_actions,
+								num_term_actions);
+		if (!tmp_action) {
+			err = -EOPNOTSUPP;
+			goto free_actions;
+		}
+		fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+		actions[num_actions++] = tmp_action;
+	}
 
 	rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
 				  &params,
@@ -412,7 +476,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 		goto free_actions;
 	}
 
+	kfree(term_actions);
 	kfree(actions);
+
 	fte->fs_dr_rule.dr_rule = rule;
 	fte->fs_dr_rule.num_actions = fs_dr_num_actions;
 	fte->fs_dr_rule.dr_actions = fs_dr_actions;
@@ -420,13 +486,18 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 	return 0;
 
 free_actions:
-	for (i = 0; i < fs_dr_num_actions; i++)
+	/* Free in reverse order to handle action dependencies */
+	for (i = fs_dr_num_actions - 1; i >= 0; i--)
 		if (!IS_ERR_OR_NULL(fs_dr_actions[i]))
 			mlx5dr_action_destroy(fs_dr_actions[i]);
 
-	mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err);
-	kfree(actions);
+	kfree(term_actions);
+free_fs_dr_actions_alloc:
 	kfree(fs_dr_actions);
+free_actions_alloc:
+	kfree(actions);
+out_err:
+	mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err);
 	return err;
 }
 
@@ -533,7 +604,8 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
 	if (err)
 		return err;
 
-	for (i = 0; i < rule->num_actions; i++)
+	/* Free in reverse order to handle action dependencies */
+	for (i = rule->num_actions - 1; i >= 0; i--)
 		if (!IS_ERR_OR_NULL(rule->dr_actions[i]))
 			mlx5dr_action_destroy(rule->dr_actions[i]);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
index 1722f4668269..e01c3766c7de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -32,6 +32,7 @@ enum {
 };
 
 enum {
+	MLX5DR_ACTION_MDFY_HW_OP_COPY		= 0x1,
 	MLX5DR_ACTION_MDFY_HW_OP_SET		= 0x2,
 	MLX5DR_ACTION_MDFY_HW_OP_ADD		= 0x3,
 };
@@ -625,4 +626,19 @@ struct mlx5_ifc_dr_action_hw_set_bits {
 	u8         inline_data[0x20];
 };
 
+struct mlx5_ifc_dr_action_hw_copy_bits {
+	u8         opcode[0x8];
+	u8         destination_field_code[0x8];
+	u8         reserved_at_10[0x2];
+	u8         destination_left_shifter[0x6];
+	u8         reserved_at_18[0x2];
+	u8         destination_length[0x6];
+
+	u8         reserved_at_20[0x8];
+	u8         source_field_code[0x8];
+	u8         reserved_at_30[0x2];
+	u8         source_left_shifter[0x6];
+	u8         reserved_at_38[0x8];
+};
+
 #endif /* MLX5_IFC_DR_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index adda9cbfba45..e1edc9c247b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -33,6 +33,11 @@ struct mlx5dr_match_parameters {
 	u64 *match_buf; /* Device spec format */
 };
 
+struct mlx5dr_action_dest {
+	struct mlx5dr_action *dest;
+	struct mlx5dr_action *reformat;
+};
+
 #ifdef CONFIG_MLX5_SW_STEERING
 
 struct mlx5dr_domain *
@@ -46,7 +51,7 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
 			    struct mlx5dr_domain *peer_dmn);
 
 struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level);
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags);
 
 int mlx5dr_table_destroy(struct mlx5dr_table *table);
 
@@ -75,14 +80,19 @@ struct mlx5dr_action *
 mlx5dr_action_create_dest_table(struct mlx5dr_table *table);
 
 struct mlx5dr_action *
-mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
-					struct mlx5_core_dev *mdev);
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain,
+					struct mlx5_flow_table *ft);
 
 struct mlx5dr_action *
 mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
 				u32 vport, u8 vhca_id_valid,
 				u16 vhca_id);
 
+struct mlx5dr_action *
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+				   struct mlx5dr_action_dest *dests,
+				   u32 num_of_dests);
+
 struct mlx5dr_action *mlx5dr_action_create_drop(void);
 
 struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value);
@@ -131,7 +141,7 @@ mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
 		       struct mlx5dr_domain *peer_dmn) { }
 
 static inline struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level) { return NULL; }
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; }
 
 static inline int
 mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; }
@@ -165,8 +175,8 @@ static inline struct mlx5dr_action *
 mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; }
 
 static inline struct mlx5dr_action *
-mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
-					struct mlx5_core_dev *mdev) { return NULL; }
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain,
+					struct mlx5_flow_table *ft) { return NULL; }
 
 static inline struct mlx5dr_action *
 mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
@@ -174,6 +184,11 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
 				u16 vhca_id) { return NULL; }
 
 static inline struct mlx5dr_action *
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+				   struct mlx5dr_action_dest *dests,
+				   u32 num_of_dests)  { return NULL; }
+
+static inline struct mlx5dr_action *
 mlx5dr_action_create_drop(void) { return NULL; }
 
 static inline struct mlx5dr_action *
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index f2a0e72285ba..02f7e4a39578 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -89,7 +89,7 @@ void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides)
 	len = nstrides << wq->fbc.log_stride;
 	wqe = mlx5_wq_cyc_get_wqe(wq, ix);
 
-	pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %ld\n",
+	pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %zu\n",
 		mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len);
 	print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index 2b543911ae00..c4caeeadcba9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -213,8 +213,8 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module)
 
 err_register_netdev:
 	mlxsw_m->ports[local_port] = NULL;
-	free_netdev(dev);
 err_dev_addr_get:
+	free_netdev(dev);
 err_alloc_etherdev:
 	mlxsw_core_port_fini(mlxsw_m->core, local_port);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index af30e8a76682..dd6685156396 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3477,10 +3477,10 @@ MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
 MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8);
 
 enum mlxsw_reg_qeec_hr {
-	MLXSW_REG_QEEC_HIERARCY_PORT,
-	MLXSW_REG_QEEC_HIERARCY_GROUP,
-	MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
-	MLXSW_REG_QEEC_HIERARCY_TC,
+	MLXSW_REG_QEEC_HR_PORT,
+	MLXSW_REG_QEEC_HR_GROUP,
+	MLXSW_REG_QEEC_HR_SUBGROUP,
+	MLXSW_REG_QEEC_HR_TC,
 };
 
 /* reg_qeec_element_hierarchy
@@ -3563,8 +3563,8 @@ MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
  */
 MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1);
 
-/* A large max rate will disable the max shaper. */
-#define MLXSW_REG_QEEC_MAS_DIS	200000000	/* Kbps */
+/* The largest max shaper value possible to disable the shaper. */
+#define MLXSW_REG_QEEC_MAS_DIS	((1u << 31) - 1)	/* Kbps */
 
 /* reg_qeec_max_shaper_rate
  * Max shaper information rate.
@@ -3602,6 +3602,21 @@ MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1);
  */
 MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8);
 
+/* reg_qeec_max_shaper_bs
+ * Max shaper burst size
+ * Burst size is 2^max_shaper_bs * 512 bits
+ * For Spectrum-1: Range is: 5..25
+ * For Spectrum-2: Range is: 11..25
+ * Reserved when ptps = 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
+
+#define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS	25
+#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1	5
+#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2	11
+#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3	5
+
 static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
 				       enum mlxsw_reg_qeec_hr hr, u8 index,
 				       u8 next_index)
@@ -3618,8 +3633,7 @@ static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port,
 {
 	MLXSW_REG_ZERO(qeec, payload);
 	mlxsw_reg_qeec_local_port_set(payload, local_port);
-	mlxsw_reg_qeec_element_hierarchy_set(payload,
-					     MLXSW_REG_QEEC_HIERARCY_PORT);
+	mlxsw_reg_qeec_element_hierarchy_set(payload, MLXSW_REG_QEEC_HR_PORT);
 	mlxsw_reg_qeec_ptps_set(payload, ptps);
 }
 
@@ -3749,6 +3763,38 @@ mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp)
 	mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp);
 }
 
+/* QPDP - QoS Port DSCP to Priority Mapping Register
+ * -------------------------------------------------
+ * This register controls the port default Switch Priority and Color. The
+ * default Switch Priority and Color are used for frames where the trust state
+ * uses default values. All member ports of a LAG should be configured with the
+ * same default values.
+ */
+#define MLXSW_REG_QPDP_ID 0x4007
+#define MLXSW_REG_QPDP_LEN 0x8
+
+MLXSW_REG_DEFINE(qpdp, MLXSW_REG_QPDP_ID, MLXSW_REG_QPDP_LEN);
+
+/* reg_qpdp_local_port
+ * Local Port. Supported for data packets from CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpdp, local_port, 0x00, 16, 8);
+
+/* reg_qpdp_switch_prio
+ * Default port Switch Priority (default 0)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpdp, switch_prio, 0x04, 0, 4);
+
+static inline void mlxsw_reg_qpdp_pack(char *payload, u8 local_port,
+				       u8 switch_prio)
+{
+	MLXSW_REG_ZERO(qpdp, payload);
+	mlxsw_reg_qpdp_local_port_set(payload, local_port);
+	mlxsw_reg_qpdp_switch_prio_set(payload, switch_prio);
+}
+
 /* QPDPM - QoS Port DSCP to Priority Mapping Register
  * --------------------------------------------------
  * This register controls the mapping from DSCP field to
@@ -5482,6 +5528,7 @@ enum mlxsw_reg_htgt_discard_trap_group {
 	MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS,
 };
 
 /* reg_htgt_trap_group
@@ -10109,6 +10156,92 @@ static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc)
 	mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc);
 }
 
+/* TIEEM - Tunneling IPinIP Encapsulation ECN Mapping Register
+ * -----------------------------------------------------------
+ * The TIEEM register maps ECN of the IP header at the ingress to the
+ * encapsulation to the ECN of the underlay network.
+ */
+#define MLXSW_REG_TIEEM_ID 0xA812
+#define MLXSW_REG_TIEEM_LEN 0x0C
+
+MLXSW_REG_DEFINE(tieem, MLXSW_REG_TIEEM_ID, MLXSW_REG_TIEEM_LEN);
+
+/* reg_tieem_overlay_ecn
+ * ECN of the IP header in the overlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tieem, overlay_ecn, 0x04, 24, 2);
+
+/* reg_tineem_underlay_ecn
+ * ECN of the IP header in the underlay network.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tieem, underlay_ecn, 0x04, 16, 2);
+
+static inline void mlxsw_reg_tieem_pack(char *payload, u8 overlay_ecn,
+					u8 underlay_ecn)
+{
+	MLXSW_REG_ZERO(tieem, payload);
+	mlxsw_reg_tieem_overlay_ecn_set(payload, overlay_ecn);
+	mlxsw_reg_tieem_underlay_ecn_set(payload, underlay_ecn);
+}
+
+/* TIDEM - Tunneling IPinIP Decapsulation ECN Mapping Register
+ * -----------------------------------------------------------
+ * The TIDEM register configures the actions that are done in the
+ * decapsulation.
+ */
+#define MLXSW_REG_TIDEM_ID 0xA813
+#define MLXSW_REG_TIDEM_LEN 0x0C
+
+MLXSW_REG_DEFINE(tidem, MLXSW_REG_TIDEM_ID, MLXSW_REG_TIDEM_LEN);
+
+/* reg_tidem_underlay_ecn
+ * ECN field of the IP header in the underlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tidem, underlay_ecn, 0x04, 24, 2);
+
+/* reg_tidem_overlay_ecn
+ * ECN field of the IP header in the overlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tidem, overlay_ecn, 0x04, 16, 2);
+
+/* reg_tidem_eip_ecn
+ * Egress IP ECN. ECN field of the IP header of the packet which goes out
+ * from the decapsulation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tidem, eip_ecn, 0x04, 8, 2);
+
+/* reg_tidem_trap_en
+ * Trap enable:
+ * 0 - No trap due to decap ECN
+ * 1 - Trap enable with trap_id
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tidem, trap_en, 0x08, 28, 4);
+
+/* reg_tidem_trap_id
+ * Trap ID. Either DECAP_ECN0 or DECAP_ECN1.
+ * Reserved when trap_en is '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tidem, trap_id, 0x08, 0, 9);
+
+static inline void mlxsw_reg_tidem_pack(char *payload, u8 underlay_ecn,
+					u8 overlay_ecn, u8 eip_ecn,
+					bool trap_en, u16 trap_id)
+{
+	MLXSW_REG_ZERO(tidem, payload);
+	mlxsw_reg_tidem_underlay_ecn_set(payload, underlay_ecn);
+	mlxsw_reg_tidem_overlay_ecn_set(payload, overlay_ecn);
+	mlxsw_reg_tidem_eip_ecn_set(payload, eip_ecn);
+	mlxsw_reg_tidem_trap_en_set(payload, trap_en);
+	mlxsw_reg_tidem_trap_id_set(payload, trap_id);
+}
+
 /* SBPR - Shared Buffer Pools Register
  * -----------------------------------
  * The SBPR configures and retrieves the shared buffer pools and configuration.
@@ -10581,6 +10714,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(qeec),
 	MLXSW_REG(qrwe),
 	MLXSW_REG(qpdsm),
+	MLXSW_REG(qpdp),
 	MLXSW_REG(qpdpm),
 	MLXSW_REG(qtctm),
 	MLXSW_REG(qpsc),
@@ -10652,6 +10786,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(tndem),
 	MLXSW_REG(tnpc),
 	MLXSW_REG(tigcr),
+	MLXSW_REG(tieem),
+	MLXSW_REG(tidem),
 	MLXSW_REG(sbpr),
 	MLXSW_REG(sbcm),
 	MLXSW_REG(sbpm),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8ed15199eb4f..7358b5bc7eb6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -45,11 +45,9 @@
 #include "spectrum_ptp.h"
 #include "../mlxfw/mlxfw.h"
 
-#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
-
 #define MLXSW_SP1_FWREV_MAJOR 13
 #define MLXSW_SP1_FWREV_MINOR 2000
-#define MLXSW_SP1_FWREV_SUBMINOR 2308
+#define MLXSW_SP1_FWREV_SUBMINOR 2714
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -66,7 +64,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
 
 #define MLXSW_SP2_FWREV_MAJOR 29
 #define MLXSW_SP2_FWREV_MINOR 2000
-#define MLXSW_SP2_FWREV_SUBMINOR 2308
+#define MLXSW_SP2_FWREV_SUBMINOR 2714
 
 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
 	.major = MLXSW_SP2_FWREV_MAJOR,
@@ -197,6 +195,10 @@ struct mlxsw_sp_ptp_ops {
 			  u64 *data, int data_index);
 };
 
+struct mlxsw_sp_span_ops {
+	u32 (*buffsize_get)(int mtu, u32 speed);
+};
+
 static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
 				    u16 component_index, u32 *p_max_size,
 				    u8 *p_align_bits, u16 *p_max_write_size)
@@ -423,13 +425,12 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
 		     rev->major, req_rev->major);
 		return -EINVAL;
 	}
-	if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
-	    MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) &&
-	    mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
+	if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
 		return 0;
 
-	dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
-		 rev->major, rev->minor, rev->subminor);
+	dev_err(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n",
+		rev->major, rev->minor, rev->subminor, req_rev->major,
+		req_rev->minor, req_rev->subminor);
 	dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
 		 fw_filename);
 
@@ -1793,6 +1794,10 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data);
 	case TC_SETUP_QDISC_PRIO:
 		return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data);
+	case TC_SETUP_QDISC_ETS:
+		return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data);
+	case TC_SETUP_QDISC_TBF:
+		return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3536,6 +3541,27 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
+int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed)
+{
+	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+	u32 eth_proto_oper;
+	int err;
+
+	port_type_speed_ops = mlxsw_sp->port_type_speed_ops;
+	port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl,
+					       mlxsw_sp_port->local_port, 0,
+					       false);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	if (err)
+		return err;
+	port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL,
+						 &eth_proto_oper);
+	*speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper);
+	return 0;
+}
+
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
 			  bool dwrr, u8 dwrr_weight)
@@ -3553,7 +3579,7 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				  enum mlxsw_reg_qeec_hr hr, u8 index,
-				  u8 next_index, u32 maxrate)
+				  u8 next_index, u32 maxrate, u8 burst_size)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char qeec_pl[MLXSW_REG_QEEC_LEN];
@@ -3562,6 +3588,7 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			    next_index);
 	mlxsw_reg_qeec_mase_set(qeec_pl, true);
 	mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate);
+	mlxsw_reg_qeec_max_shaper_bs_set(qeec_pl, burst_size);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
 }
 
@@ -3599,26 +3626,25 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	 * one subgroup, which are all member in the same group.
 	 */
 	err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
-				    MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false,
-				    0);
+				    MLXSW_REG_QEEC_HR_GROUP, 0, 0, false, 0);
 	if (err)
 		return err;
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
-					    MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+					    MLXSW_REG_QEEC_HR_SUBGROUP, i,
 					    0, false, 0);
 		if (err)
 			return err;
 	}
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
-					    MLXSW_REG_QEEC_HIERARCY_TC, i, i,
+					    MLXSW_REG_QEEC_HR_TC, i, i,
 					    false, 0);
 		if (err)
 			return err;
 
 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
-					    MLXSW_REG_QEEC_HIERARCY_TC,
+					    MLXSW_REG_QEEC_HR_TC,
 					    i + 8, i,
 					    true, 100);
 		if (err)
@@ -3630,30 +3656,30 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	 * for the initial configuration.
 	 */
 	err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-					    MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
-					    MLXSW_REG_QEEC_MAS_DIS);
+					    MLXSW_REG_QEEC_HR_PORT, 0, 0,
+					    MLXSW_REG_QEEC_MAS_DIS, 0);
 	if (err)
 		return err;
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-						    MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+						    MLXSW_REG_QEEC_HR_SUBGROUP,
 						    i, 0,
-						    MLXSW_REG_QEEC_MAS_DIS);
+						    MLXSW_REG_QEEC_MAS_DIS, 0);
 		if (err)
 			return err;
 	}
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-						    MLXSW_REG_QEEC_HIERARCY_TC,
+						    MLXSW_REG_QEEC_HR_TC,
 						    i, i,
-						    MLXSW_REG_QEEC_MAS_DIS);
+						    MLXSW_REG_QEEC_MAS_DIS, 0);
 		if (err)
 			return err;
 
 		err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-						    MLXSW_REG_QEEC_HIERARCY_TC,
+						    MLXSW_REG_QEEC_HR_TC,
 						    i + 8, i,
-						    MLXSW_REG_QEEC_MAS_DIS);
+						    MLXSW_REG_QEEC_MAS_DIS, 0);
 		if (err)
 			return err;
 	}
@@ -3661,7 +3687,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	/* Configure the min shaper for multicast TCs. */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
-					       MLXSW_REG_QEEC_HIERARCY_TC,
+					       MLXSW_REG_QEEC_HR_TC,
 					       i + 8, i,
 					       MLXSW_REG_QEEC_MIS_MIN);
 		if (err)
@@ -3885,6 +3911,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 
 	INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw,
 			  mlxsw_sp->ptp_ops->shaper_work);
+	INIT_DELAYED_WORK(&mlxsw_sp_port->span.speed_update_dw,
+			  mlxsw_sp_span_speed_update_work);
 
 	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	err = register_netdev(dev);
@@ -3941,6 +3969,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
 	cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+	cancel_delayed_work_sync(&mlxsw_sp_port->span.speed_update_dw);
 	cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw);
 	mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
 	mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
@@ -4348,6 +4377,7 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
 		netdev_info(mlxsw_sp_port->dev, "link up\n");
 		netif_carrier_on(mlxsw_sp_port->dev);
 		mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0);
+		mlxsw_core_schedule_dw(&mlxsw_sp_port->span.speed_update_dw, 0);
 	} else {
 		netdev_info(mlxsw_sp_port->dev, "link down\n");
 		netif_carrier_off(mlxsw_sp_port->dev);
@@ -4547,10 +4577,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 			  false),
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
 	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
+	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD,
+			     ROUTER_EXP, false),
+	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_MC_DMAC, FORWARD,
+			     ROUTER_EXP, false),
+	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_DIP, FORWARD,
+			     ROUTER_EXP, false),
+	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD,
+			     ROUTER_EXP, false),
 	/* PKT Sample trap */
 	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
 		  false, SP_IP2ME, DISCARD),
@@ -4889,6 +4925,33 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
 	.get_stats	= mlxsw_sp2_get_stats,
 };
 
+static u32 mlxsw_sp1_span_buffsize_get(int mtu, u32 speed)
+{
+	return mtu * 5 / 2;
+}
+
+static const struct mlxsw_sp_span_ops mlxsw_sp1_span_ops = {
+	.buffsize_get = mlxsw_sp1_span_buffsize_get,
+};
+
+#define MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR 38
+
+static u32 mlxsw_sp2_span_buffsize_get(int mtu, u32 speed)
+{
+	return 3 * mtu + MLXSW_SP2_SPAN_EG_MIRROR_BUFFER_FACTOR * speed / 1000;
+}
+
+static const struct mlxsw_sp_span_ops mlxsw_sp2_span_ops = {
+	.buffsize_get = mlxsw_sp2_span_buffsize_get,
+};
+
+u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed)
+{
+	u32 buffsize = mlxsw_sp->span_ops->buffsize_get(speed, mtu);
+
+	return mlxsw_sp_bytes_cells(mlxsw_sp, buffsize) + 1;
+}
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr);
 
@@ -5110,8 +5173,10 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
 	mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
+	mlxsw_sp->span_ops = &mlxsw_sp1_span_ops;
 	mlxsw_sp->listeners = mlxsw_sp1_listener;
 	mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
+	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
 }
@@ -5135,6 +5200,8 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
 	mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
+	mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
+	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
 }
@@ -5156,6 +5223,8 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
 	mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
+	mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
+	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 347bec9d1ecf..a0f1f9dceec5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -140,6 +140,7 @@ struct mlxsw_sp_sb_vals;
 struct mlxsw_sp_port_type_speed_ops;
 struct mlxsw_sp_ptp_state;
 struct mlxsw_sp_ptp_ops;
+struct mlxsw_sp_span_ops;
 
 struct mlxsw_sp_port_mapping {
 	u8 module;
@@ -185,8 +186,10 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_sb_vals *sb_vals;
 	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
 	const struct mlxsw_sp_ptp_ops *ptp_ops;
+	const struct mlxsw_sp_span_ops *span_ops;
 	const struct mlxsw_listener *listeners;
 	size_t listeners_count;
+	u32 lowest_shaper_bs;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -292,6 +295,9 @@ struct mlxsw_sp_port {
 		struct mlxsw_sp_ptp_port_stats stats;
 	} ptp;
 	u8 split_base_local_port;
+	struct {
+		struct delayed_work speed_update_dw;
+	} span;
 };
 
 struct mlxsw_sp_port_type_speed_ops {
@@ -471,6 +477,7 @@ extern struct notifier_block mlxsw_sp_switchdev_notifier;
 /* spectrum.c */
 void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
 				       u8 local_port, void *priv);
+int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
 			  bool dwrr, u8 dwrr_weight);
@@ -481,7 +488,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
 				 struct ieee_pfc *my_pfc);
 int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				  enum mlxsw_reg_qeec_hr hr, u8 index,
-				  u8 next_index, u32 maxrate);
+				  u8 next_index, u32 maxrate, u8 burst_size);
 enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state);
 int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
 			      u8 state);
@@ -501,6 +508,7 @@ int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 				unsigned int *p_counter_index);
 void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
 				unsigned int counter_index);
+u32 mlxsw_sp_span_buffsize_get(struct mlxsw_sp *mlxsw_sp, int mtu, u32 speed);
 bool mlxsw_sp_port_dev_check(const struct net_device *dev);
 struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev);
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
@@ -852,6 +860,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 			  struct tc_red_qopt_offload *p);
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct tc_prio_qopt_offload *p);
+int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_ets_qopt_offload *p);
+int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_tbf_qopt_offload *p);
 
 /* spectrum_fid.c */
 bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 150b3a144b83..3d3cca596116 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <linux/mutex.h>
 #include <net/net_namespace.h>
 #include <net/tc_act/tc_vlan.h>
 
@@ -25,6 +26,7 @@ struct mlxsw_sp_acl {
 	struct mlxsw_sp_fid *dummy_fid;
 	struct rhashtable ruleset_ht;
 	struct list_head rules;
+	struct mutex rules_lock; /* Protects rules list */
 	struct {
 		struct delayed_work dw;
 		unsigned long interval;	/* ms */
@@ -701,7 +703,9 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
 			goto err_ruleset_block_bind;
 	}
 
+	mutex_lock(&mlxsw_sp->acl->rules_lock);
 	list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
+	mutex_unlock(&mlxsw_sp->acl->rules_lock);
 	block->rule_count++;
 	block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker;
 	return 0;
@@ -723,7 +727,9 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 
 	block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
 	ruleset->ht_key.block->rule_count--;
+	mutex_lock(&mlxsw_sp->acl->rules_lock);
 	list_del(&rule->list);
+	mutex_unlock(&mlxsw_sp->acl->rules_lock);
 	if (!ruleset->ht_key.chain_index &&
 	    mlxsw_sp_acl_ruleset_is_singular(ruleset))
 		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
@@ -783,19 +789,18 @@ static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl)
 	struct mlxsw_sp_acl_rule *rule;
 	int err;
 
-	/* Protect internal structures from changes */
-	rtnl_lock();
+	mutex_lock(&acl->rules_lock);
 	list_for_each_entry(rule, &acl->rules, list) {
 		err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp,
 							rule);
 		if (err)
 			goto err_rule_update;
 	}
-	rtnl_unlock();
+	mutex_unlock(&acl->rules_lock);
 	return 0;
 
 err_rule_update:
-	rtnl_unlock();
+	mutex_unlock(&acl->rules_lock);
 	return err;
 }
 
@@ -880,6 +885,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 	acl->dummy_fid = fid;
 
 	INIT_LIST_HEAD(&acl->rules);
+	mutex_init(&acl->rules_lock);
 	err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam);
 	if (err)
 		goto err_acl_ops_init;
@@ -892,6 +898,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 
 err_acl_ops_init:
+	mutex_destroy(&acl->rules_lock);
 	mlxsw_sp_fid_put(fid);
 err_fid_get:
 	rhashtable_destroy(&acl->ruleset_ht);
@@ -908,6 +915,7 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
 
 	cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw);
 	mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam);
+	mutex_destroy(&acl->rules_lock);
 	WARN_ON(!list_empty(&acl->rules));
 	mlxsw_sp_fid_put(acl->dummy_fid);
 	rhashtable_destroy(&acl->ruleset_ht);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 21296fa7f7fb..49a72a8f1f57 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -160,7 +160,7 @@ static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port,
 		u8 weight = ets->tc_tx_bw[i];
 
 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
-					    MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+					    MLXSW_REG_QEEC_HR_SUBGROUP, i,
 					    0, dwrr, weight);
 		if (err) {
 			netdev_err(dev, "Failed to link subgroup ETS element %d to group\n",
@@ -198,7 +198,7 @@ err_port_ets_set:
 		u8 weight = my_ets->tc_tx_bw[i];
 
 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
-					    MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+					    MLXSW_REG_QEEC_HR_SUBGROUP, i,
 					    0, dwrr, weight);
 	}
 	return err;
@@ -369,6 +369,17 @@ err_update_qrwe:
 }
 
 static int
+mlxsw_sp_port_dcb_app_update_qpdp(struct mlxsw_sp_port *mlxsw_sp_port,
+				  u8 default_prio)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char qpdp_pl[MLXSW_REG_QPDP_LEN];
+
+	mlxsw_reg_qpdp_pack(qpdp_pl, mlxsw_sp_port->local_port, default_prio);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdp), qpdp_pl);
+}
+
+static int
 mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port,
 				   struct dcb_ieee_app_dscp_map *map)
 {
@@ -405,6 +416,12 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port)
 	int err;
 
 	default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port);
+	err = mlxsw_sp_port_dcb_app_update_qpdp(mlxsw_sp_port, default_prio);
+	if (err) {
+		netdev_err(mlxsw_sp_port->dev, "Couldn't configure port default priority\n");
+		return err;
+	}
+
 	have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port,
 							&prio_map);
 
@@ -507,9 +524,9 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-						    MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+						    MLXSW_REG_QEEC_HR_SUBGROUP,
 						    i, 0,
-						    maxrate->tc_maxrate[i]);
+						    maxrate->tc_maxrate[i], 0);
 		if (err) {
 			netdev_err(dev, "Failed to set maxrate for TC %d\n", i);
 			goto err_port_ets_maxrate_set;
@@ -523,8 +540,9 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
 err_port_ets_maxrate_set:
 	for (i--; i >= 0; i--)
 		mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
-					      MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
-					      i, 0, my_maxrate->tc_maxrate[i]);
+					      MLXSW_REG_QEEC_HR_SUBGROUP,
+					      i, 0,
+					      my_maxrate->tc_maxrate[i], 0);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 6400cd644b7a..a8525992528f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -3,8 +3,10 @@
 
 #include <net/ip_tunnels.h>
 #include <net/ip6_tunnel.h>
+#include <net/inet_ecn.h>
 
 #include "spectrum_ipip.h"
+#include "reg.h"
 
 struct ip_tunnel_parm
 mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
@@ -338,3 +340,61 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
 const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
 	[MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
 };
+
+static int mlxsw_sp_ipip_ecn_encap_init_one(struct mlxsw_sp *mlxsw_sp,
+					    u8 inner_ecn, u8 outer_ecn)
+{
+	char tieem_pl[MLXSW_REG_TIEEM_LEN];
+
+	mlxsw_reg_tieem_pack(tieem_pl, inner_ecn, outer_ecn);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tieem), tieem_pl);
+}
+
+int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	/* Iterate over inner ECN values */
+	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+		u8 outer_ecn = INET_ECN_encapsulate(0, i);
+		int err;
+
+		err = mlxsw_sp_ipip_ecn_encap_init_one(mlxsw_sp, i, outer_ecn);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int mlxsw_sp_ipip_ecn_decap_init_one(struct mlxsw_sp *mlxsw_sp,
+					    u8 inner_ecn, u8 outer_ecn)
+{
+	char tidem_pl[MLXSW_REG_TIDEM_LEN];
+	bool trap_en, set_ce = false;
+	u8 new_inner_ecn;
+
+	trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
+	new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
+
+	mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn,
+			     trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl);
+}
+
+int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i, j, err;
+
+	/* Iterate over inner ECN values */
+	for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+		/* Iterate over outer ECN values */
+		for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) {
+			err = mlxsw_sp_ipip_ecn_decap_init_one(mlxsw_sp, i, j);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index ec2ff3d7f41c..34f7c3501b08 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -920,6 +920,7 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config,
 		egr_types = 0xff;
 		break;
 	case HWTSTAMP_TX_ONESTEP_SYNC:
+	case HWTSTAMP_TX_ONESTEP_P2P:
 		return -ERANGE;
 	}
 
@@ -1015,27 +1016,17 @@ mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable)
 
 static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
-	u32 eth_proto_oper, speed;
 	bool ptps = false;
 	int err, i;
+	u32 speed;
 
 	if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port))
 		return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false);
 
-	port_type_speed_ops = mlxsw_sp->port_type_speed_ops;
-	port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl,
-					       mlxsw_sp_port->local_port, 0,
-					       false);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	err = mlxsw_sp_port_speed_get(mlxsw_sp_port, &speed);
 	if (err)
 		return err;
-	port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL,
-						 &eth_proto_oper);
 
-	speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper);
 	for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) {
 		if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) {
 			ptps = true;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0124bfe1963b..79a2801d59f6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -18,6 +18,8 @@ enum mlxsw_sp_qdisc_type {
 	MLXSW_SP_QDISC_NO_QDISC,
 	MLXSW_SP_QDISC_RED,
 	MLXSW_SP_QDISC_PRIO,
+	MLXSW_SP_QDISC_ETS,
+	MLXSW_SP_QDISC_TBF,
 };
 
 struct mlxsw_sp_qdisc_ops {
@@ -226,6 +228,70 @@ mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
 	}
 }
 
+static void
+mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+				u64 *p_tx_bytes, u64 *p_tx_packets,
+				u64 *p_drops, u64 *p_backlog)
+{
+	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+	struct mlxsw_sp_port_xstats *xstats;
+	u64 tx_bytes, tx_packets;
+
+	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+					       mlxsw_sp_qdisc->prio_bitmap,
+					       &tx_packets, &tx_bytes);
+
+	*p_tx_packets += tx_packets;
+	*p_tx_bytes += tx_bytes;
+	*p_drops += xstats->wred_drop[tclass_num] +
+		    mlxsw_sp_xstats_tail_drop(xstats, tclass_num);
+	*p_backlog += mlxsw_sp_xstats_backlog(xstats, tclass_num);
+}
+
+static void
+mlxsw_sp_qdisc_update_stats(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			    u64 tx_bytes, u64 tx_packets,
+			    u64 drops, u64 backlog,
+			    struct tc_qopt_offload_stats *stats_ptr)
+{
+	struct mlxsw_sp_qdisc_stats *stats_base = &mlxsw_sp_qdisc->stats_base;
+
+	tx_bytes -= stats_base->tx_bytes;
+	tx_packets -= stats_base->tx_packets;
+	drops -= stats_base->drops;
+	backlog -= stats_base->backlog;
+
+	_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
+	stats_ptr->qstats->drops += drops;
+	stats_ptr->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp, backlog);
+
+	stats_base->backlog += backlog;
+	stats_base->drops += drops;
+	stats_base->tx_bytes += tx_bytes;
+	stats_base->tx_packets += tx_packets;
+}
+
+static void
+mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			    struct tc_qopt_offload_stats *stats_ptr)
+{
+	u64 tx_packets = 0;
+	u64 tx_bytes = 0;
+	u64 backlog = 0;
+	u64 drops = 0;
+
+	mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+					&tx_bytes, &tx_packets,
+					&drops, &backlog);
+	mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc,
+				    tx_bytes, tx_packets, drops, backlog,
+				    stats_ptr);
+}
+
 static int
 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
 				  int tclass_num, u32 min, u32 max,
@@ -356,19 +422,28 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static void
-mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
-			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
-			     void *params)
+mlxsw_sp_qdisc_leaf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			      struct gnet_stats_queue *qstats)
 {
-	struct tc_red_qopt_offload_params *p = params;
 	u64 backlog;
 
 	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
 				       mlxsw_sp_qdisc->stats_base.backlog);
-	p->qstats->backlog -= backlog;
+	qstats->backlog -= backlog;
 	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
+static void
+mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			     void *params)
+{
+	struct tc_red_qopt_offload_params *p = params;
+
+	mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats);
+}
+
 static int
 mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
@@ -402,41 +477,21 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 			     struct tc_qopt_offload_stats *stats_ptr)
 {
-	u64 tx_bytes, tx_packets, overlimits, drops, backlog;
 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
+	u64 overlimits;
 
 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
 	stats_base = &mlxsw_sp_qdisc->stats_base;
 
-	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
-					       mlxsw_sp_qdisc->prio_bitmap,
-					       &tx_packets, &tx_bytes);
-	tx_bytes = tx_bytes - stats_base->tx_bytes;
-	tx_packets = tx_packets - stats_base->tx_packets;
-
+	mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr);
 	overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
 		     stats_base->overlimits;
-	drops = xstats->wred_drop[tclass_num] +
-		mlxsw_sp_xstats_tail_drop(xstats, tclass_num) -
-		stats_base->drops;
-	backlog = mlxsw_sp_xstats_backlog(xstats, tclass_num);
 
-	_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
 	stats_ptr->qstats->overlimits += overlimits;
-	stats_ptr->qstats->drops += drops;
-	stats_ptr->qstats->backlog +=
-				mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
-						     backlog) -
-				mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
-						     stats_base->backlog);
-
-	stats_base->backlog = backlog;
-	stats_base->drops +=  drops;
 	stats_base->overlimits += overlimits;
-	stats_base->tx_bytes += tx_bytes;
-	stats_base->tx_packets += tx_packets;
+
 	return 0;
 }
 
@@ -486,15 +541,215 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+static void
+mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+					 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+	u64 backlog_cells = 0;
+	u64 tx_packets = 0;
+	u64 tx_bytes = 0;
+	u64 drops = 0;
+
+	mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+					&tx_bytes, &tx_packets,
+					&drops, &backlog_cells);
+
+	mlxsw_sp_qdisc->stats_base.tx_packets = tx_packets;
+	mlxsw_sp_qdisc->stats_base.tx_bytes = tx_bytes;
+	mlxsw_sp_qdisc->stats_base.drops = drops;
+	mlxsw_sp_qdisc->stats_base.backlog = 0;
+}
+
 static int
-mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
-			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+	struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+	if (root_qdisc != mlxsw_sp_qdisc)
+		root_qdisc->stats_base.backlog -=
+					mlxsw_sp_qdisc->stats_base.backlog;
+
+	return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+					     MLXSW_REG_QEEC_HR_SUBGROUP,
+					     mlxsw_sp_qdisc->tclass_num, 0,
+					     MLXSW_REG_QEEC_MAS_DIS, 0);
+}
+
+static int
+mlxsw_sp_qdisc_tbf_bs(struct mlxsw_sp_port *mlxsw_sp_port,
+		      u32 max_size, u8 *p_burst_size)
+{
+	/* TBF burst size is configured in bytes. The ASIC burst size value is
+	 * ((2 ^ bs) * 512 bits. Convert the TBF bytes to 512-bit units.
+	 */
+	u32 bs512 = max_size / 64;
+	u8 bs = fls(bs512);
+
+	if (!bs)
+		return -EINVAL;
+	--bs;
+
+	/* Demand a power of two. */
+	if ((1 << bs) != bs512)
+		return -EINVAL;
+
+	if (bs < mlxsw_sp_port->mlxsw_sp->lowest_shaper_bs ||
+	    bs > MLXSW_REG_QEEC_HIGHEST_SHAPER_BS)
+		return -EINVAL;
+
+	*p_burst_size = bs;
+	return 0;
+}
+
+static u32
+mlxsw_sp_qdisc_tbf_max_size(u8 bs)
+{
+	return (1U << bs) * 64;
+}
+
+static u64
+mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p)
+{
+	/* TBF interface is in bytes/s, whereas Spectrum ASIC is configured in
+	 * Kbits/s.
+	 */
+	return p->rate.rate_bytes_ps / 1000 * 8;
+}
+
+static int
+mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+				void *params)
+{
+	struct tc_tbf_qopt_offload_replace_params *p = params;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
+	u8 burst_size;
+	int err;
+
+	if (rate_kbps >= MLXSW_REG_QEEC_MAS_DIS) {
+		dev_err(mlxsw_sp_port->mlxsw_sp->bus_info->dev,
+			"spectrum: TBF: rate of %lluKbps must be below %u\n",
+			rate_kbps, MLXSW_REG_QEEC_MAS_DIS);
+		return -EINVAL;
+	}
+
+	err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size);
+	if (err) {
+		u8 highest_shaper_bs = MLXSW_REG_QEEC_HIGHEST_SHAPER_BS;
+
+		dev_err(mlxsw_sp->bus_info->dev,
+			"spectrum: TBF: invalid burst size of %u, must be a power of two between %u and %u",
+			p->max_size,
+			mlxsw_sp_qdisc_tbf_max_size(mlxsw_sp->lowest_shaper_bs),
+			mlxsw_sp_qdisc_tbf_max_size(highest_shaper_bs));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			   void *params)
+{
+	struct tc_tbf_qopt_offload_replace_params *p = params;
+	u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
+	u8 burst_size;
+	int err;
+
+	err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size);
+	if (WARN_ON_ONCE(err))
+		/* check_params above was supposed to reject this value. */
+		return -EINVAL;
+
+	/* Configure subgroup shaper, so that both UC and MC traffic is subject
+	 * to shaping. That is unlike RED, however UC queue lengths are going to
+	 * be different than MC ones due to different pool and quota
+	 * configurations, so the configuration is not applicable. For shaper on
+	 * the other hand, subjecting the overall stream to the configured
+	 * shaper makes sense. Also note that that is what we do for
+	 * ieee_setmaxrate().
+	 */
+	return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+					     MLXSW_REG_QEEC_HR_SUBGROUP,
+					     mlxsw_sp_qdisc->tclass_num, 0,
+					     rate_kbps, burst_size);
+}
+
+static void
+mlxsw_sp_qdisc_tbf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			     void *params)
+{
+	struct tc_tbf_qopt_offload_replace_params *p = params;
+
+	mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats);
+}
+
+static int
+mlxsw_sp_qdisc_get_tbf_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			     struct tc_qopt_offload_stats *stats_ptr)
+{
+	mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+				    stats_ptr);
+	return 0;
+}
+
+static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = {
+	.type = MLXSW_SP_QDISC_TBF,
+	.check_params = mlxsw_sp_qdisc_tbf_check_params,
+	.replace = mlxsw_sp_qdisc_tbf_replace,
+	.unoffload = mlxsw_sp_qdisc_tbf_unoffload,
+	.destroy = mlxsw_sp_qdisc_tbf_destroy,
+	.get_stats = mlxsw_sp_qdisc_get_tbf_stats,
+	.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
+};
+
+int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_tbf_qopt_offload *p)
+{
+	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+	if (!mlxsw_sp_qdisc)
+		return -EOPNOTSUPP;
+
+	if (p->command == TC_TBF_REPLACE)
+		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
+					      mlxsw_sp_qdisc,
+					      &mlxsw_sp_qdisc_ops_tbf,
+					      &p->replace_params);
+
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
+				    MLXSW_SP_QDISC_TBF))
+		return -EOPNOTSUPP;
+
+	switch (p->command) {
+	case TC_TBF_DESTROY:
+		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+	case TC_TBF_STATS:
+		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+						&p->stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	int i;
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
 					  MLXSW_SP_PORT_DEFAULT_TCLASS);
+		mlxsw_sp_port_ets_set(mlxsw_sp_port,
+				      MLXSW_REG_QEEC_HR_SUBGROUP,
+				      i, 0, false, 0);
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
 				       &mlxsw_sp_port->tclass_qdiscs[i]);
 		mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
@@ -504,36 +759,58 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int
-mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
-				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
-				 void *params)
+mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
-	struct tc_prio_qopt_offload_params *p = params;
+	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
+}
 
-	if (p->bands > IEEE_8021QAZ_MAX_TCS)
+static int
+__mlxsw_sp_qdisc_ets_check_params(unsigned int nbands)
+{
+	if (nbands > IEEE_8021QAZ_MAX_TCS)
 		return -EOPNOTSUPP;
 
 	return 0;
 }
 
 static int
-mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
-			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
-			    void *params)
+mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+				 void *params)
 {
 	struct tc_prio_qopt_offload_params *p = params;
+
+	return __mlxsw_sp_qdisc_ets_check_params(p->bands);
+}
+
+static int
+__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			     unsigned int nbands,
+			     const unsigned int *quanta,
+			     const unsigned int *weights,
+			     const u8 *priomap)
+{
 	struct mlxsw_sp_qdisc *child_qdisc;
 	int tclass, i, band, backlog;
 	u8 old_priomap;
 	int err;
 
-	for (band = 0; band < p->bands; band++) {
+	for (band = 0; band < nbands; band++) {
 		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
 		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
 		old_priomap = child_qdisc->prio_bitmap;
 		child_qdisc->prio_bitmap = 0;
+
+		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+					    MLXSW_REG_QEEC_HR_SUBGROUP,
+					    tclass, 0, !!quanta[band],
+					    weights[band]);
+		if (err)
+			return err;
+
 		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-			if (p->priomap[i] == band) {
+			if (priomap[i] == band) {
 				child_qdisc->prio_bitmap |= BIT(i);
 				if (BIT(i) & old_priomap)
 					continue;
@@ -556,21 +833,46 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
 		child_qdisc->prio_bitmap = 0;
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
+		mlxsw_sp_port_ets_set(mlxsw_sp_port,
+				      MLXSW_REG_QEEC_HR_SUBGROUP,
+				      tclass, 0, false, 0);
 	}
 	return 0;
 }
 
+static int
+mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			    void *params)
+{
+	struct tc_prio_qopt_offload_params *p = params;
+	unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0};
+
+	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands,
+					    zeroes, zeroes, p->priomap);
+}
+
+static void
+__mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+			       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			       struct gnet_stats_queue *qstats)
+{
+	u64 backlog;
+
+	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+				       mlxsw_sp_qdisc->stats_base.backlog);
+	qstats->backlog -= backlog;
+}
+
 static void
 mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 			      void *params)
 {
 	struct tc_prio_qopt_offload_params *p = params;
-	u64 backlog;
 
-	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
-				       mlxsw_sp_qdisc->stats_base.backlog);
-	p->qstats->backlog -= backlog;
+	__mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc,
+				       p->qstats);
 }
 
 static int
@@ -578,37 +880,23 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 			      struct tc_qopt_offload_stats *stats_ptr)
 {
-	u64 tx_bytes, tx_packets, drops = 0, backlog = 0;
-	struct mlxsw_sp_qdisc_stats *stats_base;
-	struct mlxsw_sp_port_xstats *xstats;
-	struct rtnl_link_stats64 *stats;
+	struct mlxsw_sp_qdisc *tc_qdisc;
+	u64 tx_packets = 0;
+	u64 tx_bytes = 0;
+	u64 backlog = 0;
+	u64 drops = 0;
 	int i;
 
-	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
-	stats_base = &mlxsw_sp_qdisc->stats_base;
-
-	tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
-	tx_packets = stats->tx_packets - stats_base->tx_packets;
-
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		drops += mlxsw_sp_xstats_tail_drop(xstats, i);
-		drops += xstats->wred_drop[i];
-		backlog += mlxsw_sp_xstats_backlog(xstats, i);
+		tc_qdisc = &mlxsw_sp_port->tclass_qdiscs[i];
+		mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc,
+						&tx_bytes, &tx_packets,
+						&drops, &backlog);
 	}
-	drops = drops - stats_base->drops;
 
-	_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
-	stats_ptr->qstats->drops += drops;
-	stats_ptr->qstats->backlog +=
-				mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
-						     backlog) -
-				mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
-						     stats_base->backlog);
-	stats_base->backlog = backlog;
-	stats_base->drops += drops;
-	stats_base->tx_bytes += tx_bytes;
-	stats_base->tx_packets += tx_packets;
+	mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc,
+				    tx_bytes, tx_packets, drops, backlog,
+				    stats_ptr);
 	return 0;
 }
 
@@ -647,27 +935,93 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 };
 
-/* Grafting is not supported in mlxsw. It will result in un-offloading of the
- * grafted qdisc as well as the qdisc in the qdisc new location.
- * (However, if the graft is to the location where the qdisc is already at, it
- * will be ignored completely and won't cause un-offloading).
+static int
+mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+				void *params)
+{
+	struct tc_ets_qopt_offload_replace_params *p = params;
+
+	return __mlxsw_sp_qdisc_ets_check_params(p->bands);
+}
+
+static int
+mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			   void *params)
+{
+	struct tc_ets_qopt_offload_replace_params *p = params;
+
+	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands,
+					    p->quanta, p->weights, p->priomap);
+}
+
+static void
+mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			     void *params)
+{
+	struct tc_ets_qopt_offload_replace_params *p = params;
+
+	__mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc,
+				       p->qstats);
+}
+
+static int
+mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
+}
+
+static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
+	.type = MLXSW_SP_QDISC_ETS,
+	.check_params = mlxsw_sp_qdisc_ets_check_params,
+	.replace = mlxsw_sp_qdisc_ets_replace,
+	.unoffload = mlxsw_sp_qdisc_ets_unoffload,
+	.destroy = mlxsw_sp_qdisc_ets_destroy,
+	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
+	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
+};
+
+/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting
+ * graph is free of cycles). These operations do not change the parent handle
+ * though, which means it can be incomplete (if there is more than one class
+ * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was
+ * linked to a different class and then removed from the original class).
+ *
+ * E.g. consider this sequence of operations:
+ *
+ *  # tc qdisc add dev swp1 root handle 1: prio
+ *  # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000
+ *  RED: set bandwidth to 10Mbit
+ *  # tc qdisc link dev swp1 handle 13: parent 1:2
+ *
+ * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their
+ * child. But RED will still only claim that 1:3 is its parent. If it's removed
+ * from that band, its only parent will be 1:2, but it will continue to claim
+ * that it is in fact 1:3.
+ *
+ * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before
+ * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace
+ * notification to offload the child Qdisc, based on its parent handle, and use
+ * the graft operation to validate that the class where the child is actually
+ * grafted corresponds to the parent handle. If the two don't match, we
+ * unoffload the child.
  */
 static int
-mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
-			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
-			  struct tc_prio_qopt_offload_graft_params *p)
+__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			   u8 band, u32 child_handle)
 {
-	int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+	int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
 	struct mlxsw_sp_qdisc *old_qdisc;
 
-	/* Check if the grafted qdisc is already in its "new" location. If so -
-	 * nothing needs to be done.
-	 */
-	if (p->band < IEEE_8021QAZ_MAX_TCS &&
-	    mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+	if (band < IEEE_8021QAZ_MAX_TCS &&
+	    mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle)
 		return 0;
 
-	if (!p->child_handle) {
+	if (!child_handle) {
 		/* This is an invisible FIFO replacing the original Qdisc.
 		 * Ignore it--the original Qdisc's destroy will follow.
 		 */
@@ -678,7 +1032,7 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
 	 * unoffload it.
 	 */
 	old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
-						  p->child_handle);
+						  child_handle);
 	if (old_qdisc)
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
 
@@ -687,6 +1041,15 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EOPNOTSUPP;
 }
 
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			  struct tc_prio_qopt_offload_graft_params *p)
+{
+	return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+					  p->band, p->child_handle);
+}
+
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct tc_prio_qopt_offload *p)
 {
@@ -720,6 +1083,40 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_ets_qopt_offload *p)
+{
+	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+	if (!mlxsw_sp_qdisc)
+		return -EOPNOTSUPP;
+
+	if (p->command == TC_ETS_REPLACE)
+		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
+					      mlxsw_sp_qdisc,
+					      &mlxsw_sp_qdisc_ops_ets,
+					      &p->replace_params);
+
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
+				    MLXSW_SP_QDISC_ETS))
+		return -EOPNOTSUPP;
+
+	switch (p->command) {
+	case TC_ETS_DESTROY:
+		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+	case TC_ETS_STATS:
+		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+						&p->stats);
+	case TC_ETS_GRAFT:
+		return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+						  p->graft_params.band,
+						  p->graft_params.child_handle);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 8290e82240fc..ce707723f8cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -382,9 +382,10 @@ enum mlxsw_sp_fib_entry_type {
 };
 
 struct mlxsw_sp_nexthop_group;
+struct mlxsw_sp_fib_entry;
 
 struct mlxsw_sp_fib_node {
-	struct list_head entry_list;
+	struct mlxsw_sp_fib_entry *fib_entry;
 	struct list_head list;
 	struct rhash_head ht_node;
 	struct mlxsw_sp_fib *fib;
@@ -397,7 +398,6 @@ struct mlxsw_sp_fib_entry_decap {
 };
 
 struct mlxsw_sp_fib_entry {
-	struct list_head list;
 	struct mlxsw_sp_fib_node *fib_node;
 	enum mlxsw_sp_fib_entry_type type;
 	struct list_head nexthop_group_node;
@@ -1162,7 +1162,6 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
 				     const union mlxsw_sp_l3addr *addr,
 				     enum mlxsw_sp_fib_entry_type type)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
 	struct mlxsw_sp_fib_node *fib_node;
 	unsigned char addr_prefix_len;
 	struct mlxsw_sp_fib *fib;
@@ -1191,15 +1190,10 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
 
 	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
 					    addr_prefix_len);
-	if (!fib_node || list_empty(&fib_node->entry_list))
+	if (!fib_node || fib_node->fib_entry->type != type)
 		return NULL;
 
-	fib_entry = list_first_entry(&fib_node->entry_list,
-				     struct mlxsw_sp_fib_entry, list);
-	if (fib_entry->type != type)
-		return NULL;
-
-	return fib_entry;
+	return fib_node->fib_entry;
 }
 
 /* Given an IPIP entry, find the corresponding decap route. */
@@ -1209,7 +1203,6 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
 {
 	static struct mlxsw_sp_fib_node *fib_node;
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
-	struct mlxsw_sp_fib_entry *fib_entry;
 	unsigned char saddr_prefix_len;
 	union mlxsw_sp_l3addr saddr;
 	struct mlxsw_sp_fib *ul_fib;
@@ -1244,15 +1237,11 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
 
 	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
 					    saddr_prefix_len);
-	if (!fib_node || list_empty(&fib_node->entry_list))
+	if (!fib_node ||
+	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
 		return NULL;
 
-	fib_entry = list_first_entry(&fib_node->entry_list,
-				     struct mlxsw_sp_fib_entry, list);
-	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
-		return NULL;
-
-	return fib_entry;
+	return fib_node->fib_entry;
 }
 
 static struct mlxsw_sp_ipip_entry *
@@ -3231,10 +3220,6 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static bool
-mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
-				 const struct mlxsw_sp_fib_entry *fib_entry);
-
 static int
 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_nexthop_group *nh_grp)
@@ -3243,9 +3228,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
-		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
-						      fib_entry))
-			continue;
 		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 		if (err)
 			return err;
@@ -3253,24 +3235,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static void
-mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
-				   enum mlxsw_reg_ralue_op op, int err);
-
-static void
-mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
-{
-	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
-	struct mlxsw_sp_fib_entry *fib_entry;
-
-	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
-		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
-						      fib_entry))
-			continue;
-		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
-	}
-}
-
 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
 {
 	/* Valid sizes for an adjacency group are:
@@ -3374,6 +3338,73 @@ mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
 	}
 }
 
+static struct mlxsw_sp_nexthop *
+mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
+		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
+
+static void
+mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	int i;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+		if (nh->offloaded)
+			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
+		else
+			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void
+__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
+					  struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
+		struct mlxsw_sp_nexthop *nh;
+
+		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
+		if (nh && nh->offloaded)
+			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
+		else
+			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void
+mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+
+	/* Unfortunately, in IPv6 the route and the nexthop are described by
+	 * the same struct, so we need to iterate over all the routes using the
+	 * nexthop group and set / clear the offload indication for them.
+	 */
+	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
+			    common.nexthop_group_node)
+		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
+}
+
+static void
+mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
+	case AF_INET:
+		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
+		break;
+	case AF_INET6:
+		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
+		break;
+	}
+}
+
 static void
 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_nexthop_group *nh_grp)
@@ -3447,6 +3478,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		goto set_trap;
 	}
 
+	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
+
 	if (!old_adj_index_valid) {
 		/* The trap was set for fib entries, so we have to call
 		 * fib entry update to unset it and use adjacency index.
@@ -3468,9 +3501,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		goto set_trap;
 	}
 
-	/* Offload state within the group changed, so update the flags. */
-	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
-
 	return;
 
 set_trap:
@@ -3483,6 +3513,7 @@ set_trap:
 	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
 	if (err)
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
+	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
 	if (old_adj_index_valid)
 		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
 				   nh_grp->ecmp_size, nh_grp->adj_index);
@@ -3845,7 +3876,7 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
 
 	key.fib_nh = fib_nh;
 	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
-	if (WARN_ON_ONCE(!nh))
+	if (!nh)
 		return;
 
 	switch (event) {
@@ -4065,131 +4096,128 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
 }
 
 static void
-mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_fib_entry *fib_entry)
 {
-	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
-	int i;
-
-	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
-	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
-	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
-	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
-		nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
-		return;
-	}
-
-	for (i = 0; i < nh_grp->count; i++) {
-		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
+	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
+	int dst_len = fib_entry->fib_node->key.prefix_len;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
+	struct fib_rt_info fri;
+	bool should_offload;
 
-		if (nh->offloaded)
-			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
-		else
-			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
-	}
+	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
+	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
+				  common);
+	fri.fi = fi;
+	fri.tb_id = fib4_entry->tb_id;
+	fri.dst = cpu_to_be32(*p_dst);
+	fri.dst_len = dst_len;
+	fri.tos = fib4_entry->tos;
+	fri.type = fib4_entry->type;
+	fri.offload = should_offload;
+	fri.trap = !should_offload;
+	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
 }
 
 static void
-mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_fib_entry *fib_entry)
 {
-	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
-	int i;
-
-	if (!list_is_singular(&nh_grp->fib_list))
-		return;
-
-	for (i = 0; i < nh_grp->count; i++) {
-		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
+	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
+	int dst_len = fib_entry->fib_node->key.prefix_len;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
+	struct fib_rt_info fri;
 
-		nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
-	}
+	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
+				  common);
+	fri.fi = fi;
+	fri.tb_id = fib4_entry->tb_id;
+	fri.dst = cpu_to_be32(*p_dst);
+	fri.dst_len = dst_len;
+	fri.tos = fib4_entry->tos;
+	fri.type = fib4_entry->type;
+	fri.offload = false;
+	fri.trap = false;
+	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
 }
 
 static void
-mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_fib_entry *fib_entry)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	bool should_offload;
+
+	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
 
+	/* In IPv6 a multipath route is represented using multiple routes, so
+	 * we need to set the flags on all of them.
+	 */
 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
 				  common);
-
-	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
-	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
-		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
-				 list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
-		return;
-	}
-
-	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
-		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
-		struct mlxsw_sp_nexthop *nh;
-
-		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
-		if (nh && nh->offloaded)
-			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
-		else
-			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
-	}
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
+		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
+				       !should_offload);
 }
 
 static void
-mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_fib_entry *fib_entry)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
 
 	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
 				  common);
-	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		struct fib6_info *rt = mlxsw_sp_rt6->rt;
-
-		rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
-	}
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
+		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
 }
 
-static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+static void
+mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib_entry *fib_entry)
 {
 	switch (fib_entry->fib_node->fib->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_sp_fib4_entry_offload_set(fib_entry);
+		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		mlxsw_sp_fib6_entry_offload_set(fib_entry);
+		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
 		break;
 	}
 }
 
 static void
-mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_entry *fib_entry)
 {
 	switch (fib_entry->fib_node->fib->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
+		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
 		break;
 	}
 }
 
 static void
-mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
-				   enum mlxsw_reg_ralue_op op, int err)
+mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_fib_entry *fib_entry,
+				    enum mlxsw_reg_ralue_op op)
 {
 	switch (op) {
-	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
-		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
-		if (err)
-			return;
-		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
-			mlxsw_sp_fib_entry_offload_set(fib_entry);
-		else
-			mlxsw_sp_fib_entry_offload_unset(fib_entry);
-		return;
+		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
+		break;
+	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
+		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
+		break;
 	default:
-		return;
+		break;
 	}
 }
 
@@ -4416,7 +4444,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 {
 	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
 
-	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
+	if (err)
+		return err;
+
+	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
 
 	return err;
 }
@@ -4491,6 +4522,19 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
+static void
+mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fib_entry *fib_entry)
+{
+	switch (fib_entry->type) {
+	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
+		break;
+	default:
+		break;
+	}
+}
+
 static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
@@ -4523,6 +4567,7 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
 	return fib4_entry;
 
 err_nexthop4_group_get:
+	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry);
 err_fib4_entry_type_set:
 	kfree(fib4_entry);
 	return ERR_PTR(err);
@@ -4532,6 +4577,7 @@ static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_fib4_entry *fib4_entry)
 {
 	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
+	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
 	kfree(fib4_entry);
 }
 
@@ -4555,15 +4601,14 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
 	if (!fib_node)
 		return NULL;
 
-	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
-		if (fib4_entry->tb_id == fen_info->tb_id &&
-		    fib4_entry->tos == fen_info->tos &&
-		    fib4_entry->type == fen_info->type &&
-		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
-		    fen_info->fi) {
-			return fib4_entry;
-		}
-	}
+	fib4_entry = container_of(fib_node->fib_entry,
+				  struct mlxsw_sp_fib4_entry, common);
+	if (fib4_entry->tb_id == fen_info->tb_id &&
+	    fib4_entry->tos == fen_info->tos &&
+	    fib4_entry->type == fen_info->type &&
+	    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
+	    fen_info->fi)
+		return fib4_entry;
 
 	return NULL;
 }
@@ -4611,7 +4656,6 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
 	if (!fib_node)
 		return NULL;
 
-	INIT_LIST_HEAD(&fib_node->entry_list);
 	list_add(&fib_node->list, &fib->node_list);
 	memcpy(fib_node->key.addr, addr, addr_len);
 	fib_node->key.prefix_len = prefix_len;
@@ -4622,18 +4666,9 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
 {
 	list_del(&fib_node->list);
-	WARN_ON(!list_empty(&fib_node->entry_list));
 	kfree(fib_node);
 }
 
-static bool
-mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
-				 const struct mlxsw_sp_fib_entry *fib_entry)
-{
-	return list_first_entry(&fib_node->entry_list,
-				struct mlxsw_sp_fib_entry, list) == fib_entry;
-}
-
 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_fib_node *fib_node)
 {
@@ -4773,200 +4808,48 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
 
-	if (!list_empty(&fib_node->entry_list))
+	if (fib_node->fib_entry)
 		return;
 	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
 	mlxsw_sp_fib_node_destroy(fib_node);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 
-static struct mlxsw_sp_fib4_entry *
-mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-			      const struct mlxsw_sp_fib4_entry *new4_entry)
-{
-	struct mlxsw_sp_fib4_entry *fib4_entry;
-
-	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
-		if (fib4_entry->tb_id > new4_entry->tb_id)
-			continue;
-		if (fib4_entry->tb_id != new4_entry->tb_id)
-			break;
-		if (fib4_entry->tos > new4_entry->tos)
-			continue;
-		if (fib4_entry->prio >= new4_entry->prio ||
-		    fib4_entry->tos < new4_entry->tos)
-			return fib4_entry;
-	}
-
-	return NULL;
-}
-
-static int
-mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
-			       struct mlxsw_sp_fib4_entry *new4_entry)
-{
-	struct mlxsw_sp_fib_node *fib_node;
-
-	if (WARN_ON(!fib4_entry))
-		return -EINVAL;
-
-	fib_node = fib4_entry->common.fib_node;
-	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
-				 common.list) {
-		if (fib4_entry->tb_id != new4_entry->tb_id ||
-		    fib4_entry->tos != new4_entry->tos ||
-		    fib4_entry->prio != new4_entry->prio)
-			break;
-	}
-
-	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
-	return 0;
-}
-
-static int
-mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
-			       bool replace, bool append)
-{
-	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
-	struct mlxsw_sp_fib4_entry *fib4_entry;
-
-	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
-
-	if (append)
-		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
-	if (replace && WARN_ON(!fib4_entry))
-		return -EINVAL;
-
-	/* Insert new entry before replaced one, so that we can later
-	 * remove the second.
-	 */
-	if (fib4_entry) {
-		list_add_tail(&new4_entry->common.list,
-			      &fib4_entry->common.list);
-	} else {
-		struct mlxsw_sp_fib4_entry *last;
-
-		list_for_each_entry(last, &fib_node->entry_list, common.list) {
-			if (new4_entry->tb_id > last->tb_id)
-				break;
-			fib4_entry = last;
-		}
-
-		if (fib4_entry)
-			list_add(&new4_entry->common.list,
-				 &fib4_entry->common.list);
-		else
-			list_add(&new4_entry->common.list,
-				 &fib_node->entry_list);
-	}
-
-	return 0;
-}
-
-static void
-mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
-{
-	list_del(&fib4_entry->common.list);
-}
-
-static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
-				       struct mlxsw_sp_fib_entry *fib_entry)
-{
-	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-
-	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
-		return 0;
-
-	/* To prevent packet loss, overwrite the previously offloaded
-	 * entry.
-	 */
-	if (!list_is_singular(&fib_node->entry_list)) {
-		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
-		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
-
-		mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
-	}
-
-	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
-}
-
-static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
+static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_fib_entry *fib_entry)
 {
 	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-
-	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
-		return;
-
-	/* Promote the next entry by overwriting the deleted entry */
-	if (!list_is_singular(&fib_node->entry_list)) {
-		struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
-		enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
-
-		mlxsw_sp_fib_entry_update(mlxsw_sp, n);
-		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
-		return;
-	}
-
-	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
-}
-
-static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
-					 struct mlxsw_sp_fib4_entry *fib4_entry,
-					 bool replace, bool append)
-{
 	int err;
 
-	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
-	if (err)
-		return err;
+	fib_node->fib_entry = fib_entry;
 
-	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
+	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 	if (err)
-		goto err_fib_node_entry_add;
+		goto err_fib_entry_update;
 
 	return 0;
 
-err_fib_node_entry_add:
-	mlxsw_sp_fib4_node_list_remove(fib4_entry);
+err_fib_entry_update:
+	fib_node->fib_entry = NULL;
 	return err;
 }
 
 static void
-mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_fib4_entry *fib4_entry)
-{
-	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
-	mlxsw_sp_fib4_node_list_remove(fib4_entry);
-
-	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
-		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
-}
-
-static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib4_entry *fib4_entry,
-					bool replace)
+mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fib_entry *fib_entry)
 {
-	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
-	struct mlxsw_sp_fib4_entry *replaced;
-
-	if (!replace)
-		return;
-
-	/* We inserted the new entry before replaced one */
-	replaced = list_next_entry(fib4_entry, common.list);
+	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
 
-	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
-	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
-	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
+	fib_node->fib_entry = NULL;
 }
 
 static int
-mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
-			 const struct fib_entry_notifier_info *fen_info,
-			 bool replace, bool append)
+mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
+			     const struct fib_entry_notifier_info *fen_info)
 {
-	struct mlxsw_sp_fib4_entry *fib4_entry;
+	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
+	struct mlxsw_sp_fib_entry *replaced;
 	struct mlxsw_sp_fib_node *fib_node;
 	int err;
 
@@ -4989,18 +4872,26 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
 		goto err_fib4_entry_create;
 	}
 
-	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
-					    append);
+	replaced = fib_node->fib_entry;
+	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
 	if (err) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
-		goto err_fib4_node_entry_link;
+		goto err_fib_node_entry_link;
 	}
 
-	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
+	/* Nothing to replace */
+	if (!replaced)
+		return 0;
+
+	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
+	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
+				     common);
+	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
 
 	return 0;
 
-err_fib4_node_entry_link:
+err_fib_node_entry_link:
+	fib_node->fib_entry = replaced;
 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
 err_fib4_entry_create:
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
@@ -5021,7 +4912,7 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 		return;
 	fib_node = fib4_entry->common.fib_node;
 
-	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
@@ -5083,13 +4974,6 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
 	kfree(mlxsw_sp_rt6);
 }
 
-static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
-{
-	/* RTF_CACHE routes are ignored */
-	return !(rt->fib6_flags & RTF_ADDRCONF) &&
-		rt->fib6_nh->fib_nh_gw_family;
-}
-
 static struct fib6_info *
 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 {
@@ -5097,37 +4981,6 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 				list)->rt;
 }
 
-static struct mlxsw_sp_fib6_entry *
-mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-				 const struct fib6_info *nrt, bool replace)
-{
-	struct mlxsw_sp_fib6_entry *fib6_entry;
-
-	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
-		return NULL;
-
-	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
-
-		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
-		 * virtual router.
-		 */
-		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
-			continue;
-		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
-			break;
-		if (rt->fib6_metric < nrt->fib6_metric)
-			continue;
-		if (rt->fib6_metric == nrt->fib6_metric &&
-		    mlxsw_sp_fib6_rt_can_mp(rt))
-			return fib6_entry;
-		if (rt->fib6_metric > nrt->fib6_metric)
-			break;
-	}
-
-	return NULL;
-}
-
 static struct mlxsw_sp_rt6 *
 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
 			    const struct fib6_info *rt)
@@ -5313,6 +5166,11 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
 		      &nh_grp->fib_list);
 	fib6_entry->common.nh_group = nh_grp;
 
+	/* The route and the nexthop are described by the same struct, so we
+	 * need to the update the nexthop offload indication for the new route.
+	 */
+	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
+
 	return 0;
 }
 
@@ -5345,16 +5203,16 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
 	 * currently associated with it in the device's table is that
 	 * of the old group. Start using the new one instead.
 	 */
-	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
 	if (err)
-		goto err_fib_node_entry_add;
+		goto err_fib_entry_update;
 
 	if (list_empty(&old_nh_grp->fib_list))
 		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
 
 	return 0;
 
-err_fib_node_entry_add:
+err_fib_entry_update:
 	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
 err_nexthop6_group_get:
 	list_add_tail(&fib6_entry->common.nexthop_group_node,
@@ -5519,112 +5377,13 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 }
 
 static struct mlxsw_sp_fib6_entry *
-mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-			      const struct fib6_info *nrt, bool replace)
-{
-	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
-
-	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
-
-		if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
-			continue;
-		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
-			break;
-		if (replace && rt->fib6_metric == nrt->fib6_metric) {
-			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
-			    mlxsw_sp_fib6_rt_can_mp(nrt))
-				return fib6_entry;
-			if (mlxsw_sp_fib6_rt_can_mp(nrt))
-				fallback = fallback ?: fib6_entry;
-		}
-		if (rt->fib6_metric > nrt->fib6_metric)
-			return fallback ?: fib6_entry;
-	}
-
-	return fallback;
-}
-
-static int
-mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
-			       bool *p_replace)
-{
-	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
-	struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
-	struct mlxsw_sp_fib6_entry *fib6_entry;
-
-	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
-
-	if (*p_replace && !fib6_entry)
-		*p_replace = false;
-
-	if (fib6_entry) {
-		list_add_tail(&new6_entry->common.list,
-			      &fib6_entry->common.list);
-	} else {
-		struct mlxsw_sp_fib6_entry *last;
-
-		list_for_each_entry(last, &fib_node->entry_list, common.list) {
-			struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
-
-			if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
-				break;
-			fib6_entry = last;
-		}
-
-		if (fib6_entry)
-			list_add(&new6_entry->common.list,
-				 &fib6_entry->common.list);
-		else
-			list_add(&new6_entry->common.list,
-				 &fib_node->entry_list);
-	}
-
-	return 0;
-}
-
-static void
-mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
-{
-	list_del(&fib6_entry->common.list);
-}
-
-static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
-					 struct mlxsw_sp_fib6_entry *fib6_entry,
-					 bool *p_replace)
-{
-	int err;
-
-	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
-	if (err)
-		return err;
-
-	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
-	if (err)
-		goto err_fib_node_entry_add;
-
-	return 0;
-
-err_fib_node_entry_add:
-	mlxsw_sp_fib6_node_list_remove(fib6_entry);
-	return err;
-}
-
-static void
-mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_fib6_entry *fib6_entry)
-{
-	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
-	mlxsw_sp_fib6_node_list_remove(fib6_entry);
-}
-
-static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
 			   const struct fib6_info *rt)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
 	struct mlxsw_sp_fib *fib;
+	struct fib6_info *cmp_rt;
 	struct mlxsw_sp_vr *vr;
 
 	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
@@ -5638,40 +5397,23 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
 	if (!fib_node)
 		return NULL;
 
-	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
-		struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
-
-		if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
-		    rt->fib6_metric == iter_rt->fib6_metric &&
-		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
-			return fib6_entry;
-	}
+	fib6_entry = container_of(fib_node->fib_entry,
+				  struct mlxsw_sp_fib6_entry, common);
+	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
+	    rt->fib6_metric == cmp_rt->fib6_metric &&
+	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
+		return fib6_entry;
 
 	return NULL;
 }
 
-static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib6_entry *fib6_entry,
-					bool replace)
-{
-	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
-	struct mlxsw_sp_fib6_entry *replaced;
-
-	if (!replace)
-		return;
-
-	replaced = list_next_entry(fib6_entry, common.list);
-
-	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
-	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
-	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
-}
-
-static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
-				    struct fib6_info **rt_arr,
-				    unsigned int nrt6, bool replace)
+static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
+					struct fib6_info **rt_arr,
+					unsigned int nrt6)
 {
-	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
+	struct mlxsw_sp_fib_entry *replaced;
 	struct mlxsw_sp_fib_node *fib_node;
 	struct fib6_info *rt = rt_arr[0];
 	int err;
@@ -5693,18 +5435,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(fib_node))
 		return PTR_ERR(fib_node);
 
-	/* Before creating a new entry, try to append route to an existing
-	 * multipath entry.
-	 */
-	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
-	if (fib6_entry) {
-		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
-						      rt_arr, nrt6);
-		if (err)
-			goto err_fib6_entry_nexthop_add;
-		return 0;
-	}
-
 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
 						nrt6);
 	if (IS_ERR(fib6_entry)) {
@@ -5712,17 +5442,70 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 		goto err_fib6_entry_create;
 	}
 
-	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
+	replaced = fib_node->fib_entry;
+	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
 	if (err)
-		goto err_fib6_node_entry_link;
+		goto err_fib_node_entry_link;
+
+	/* Nothing to replace */
+	if (!replaced)
+		return 0;
 
-	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
+	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
+	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
+				     common);
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
 
 	return 0;
 
-err_fib6_node_entry_link:
+err_fib_node_entry_link:
+	fib_node->fib_entry = replaced;
 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
 err_fib6_entry_create:
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+	return err;
+}
+
+static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
+				       struct fib6_info **rt_arr,
+				       unsigned int nrt6)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+	struct fib6_info *rt = rt_arr[0];
+	int err;
+
+	if (mlxsw_sp->router->aborted)
+		return 0;
+
+	if (rt->fib6_src.plen)
+		return -EINVAL;
+
+	if (mlxsw_sp_fib6_rt_should_ignore(rt))
+		return 0;
+
+	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
+					 &rt->fib6_dst.addr,
+					 sizeof(rt->fib6_dst.addr),
+					 rt->fib6_dst.plen,
+					 MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(fib_node))
+		return PTR_ERR(fib_node);
+
+	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
+		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+		return -EINVAL;
+	}
+
+	fib6_entry = container_of(fib_node->fib_entry,
+				  struct mlxsw_sp_fib6_entry, common);
+	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
+					      nrt6);
+	if (err)
+		goto err_fib6_entry_nexthop_add;
+
+	return 0;
+
 err_fib6_entry_nexthop_add:
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 	return err;
@@ -5762,7 +5545,7 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
 
 	fib_node = fib6_entry->common.fib_node;
 
-	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
@@ -5916,39 +5699,25 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_fib_node *fib_node)
 {
-	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
-
-	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
-				 common.list) {
-		bool do_break = &tmp->common.list == &fib_node->entry_list;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 
-		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
-		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
-		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
-		/* Break when entry list is empty and node was freed.
-		 * Otherwise, we'll access freed memory in the next
-		 * iteration.
-		 */
-		if (do_break)
-			break;
-	}
+	fib4_entry = container_of(fib_node->fib_entry,
+				  struct mlxsw_sp_fib4_entry, common);
+	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
+	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_fib_node *fib_node)
 {
-	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
-
-	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
-				 common.list) {
-		bool do_break = &tmp->common.list == &fib_node->entry_list;
+	struct mlxsw_sp_fib6_entry *fib6_entry;
 
-		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
-		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
-		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
-		if (do_break)
-			break;
-	}
+	fib6_entry = container_of(fib_node->fib_entry,
+				  struct mlxsw_sp_fib6_entry, common);
+	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
@@ -6099,7 +5868,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 	struct mlxsw_sp_fib_event_work *fib_work =
 		container_of(work, struct mlxsw_sp_fib_event_work, work);
 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
-	bool replace, append;
 	int err;
 
 	/* Protect internal structures from changes */
@@ -6107,13 +5875,9 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 	mlxsw_sp_span_respin(mlxsw_sp);
 
 	switch (fib_work->event) {
-	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
-	case FIB_EVENT_ENTRY_ADD:
-		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
-		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
-		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
-					       replace, append);
+	case FIB_EVENT_ENTRY_REPLACE:
+		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
+						   &fib_work->fen_info);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
 		fib_info_put(fib_work->fen_info.fi);
@@ -6138,20 +5902,24 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 	struct mlxsw_sp_fib_event_work *fib_work =
 		container_of(work, struct mlxsw_sp_fib_event_work, work);
 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
-	bool replace;
 	int err;
 
 	rtnl_lock();
 	mlxsw_sp_span_respin(mlxsw_sp);
 
 	switch (fib_work->event) {
-	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_ADD:
-		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
-		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
-					       fib_work->fib6_work.rt_arr,
-					       fib_work->fib6_work.nrt6,
-					       replace);
+	case FIB_EVENT_ENTRY_REPLACE:
+		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
+						   fib_work->fib6_work.rt_arr,
+						   fib_work->fib6_work.nrt6);
+		if (err)
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
+		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
+		break;
+	case FIB_EVENT_ENTRY_APPEND:
+		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
+						  fib_work->fib6_work.rt_arr,
+						  fib_work->fib6_work.nrt6);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
 		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
@@ -6216,8 +5984,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
 
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
-	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		fen_info = container_of(info, struct fib_entry_notifier_info,
 					info);
@@ -6245,7 +6011,7 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
 
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_APPEND: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		fen6_info = container_of(info, struct fib6_entry_notifier_info,
 					 info);
@@ -6348,9 +6114,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 		err = mlxsw_sp_router_fib_rule_event(event, info,
 						     router->mlxsw_sp);
 		return notifier_from_errno(err);
-	case FIB_EVENT_ENTRY_ADD:
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND:  /* fall through */
+	case FIB_EVENT_ENTRY_APPEND:
 		if (router->aborted) {
 			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
 			return notifier_from_errno(-EINVAL);
@@ -8025,8 +7791,18 @@ mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
 
 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
 {
+	int err;
+
 	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
 	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
+
+	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
+	if (err)
+		return err;
+	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
+	if (err)
+		return err;
+
 	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index cc1de91e8217..c9b94f435cdd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -104,4 +104,7 @@ static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
 	return !memcmp(addr1, addr2, sizeof(*addr1));
 }
 
+int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp);
+
 #endif /* _MLXSW_ROUTER_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 200d324e6d99..0cdd7954a085 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -748,33 +748,50 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
 	return false;
 }
 
-static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
-					 int mtu)
+static int
+mlxsw_sp_span_port_buffsize_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 {
-	return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	u32 buffsize;
+	u32 speed;
+	int err;
+
+	err = mlxsw_sp_port_speed_get(mlxsw_sp_port, &speed);
+	if (err)
+		return err;
+	if (speed == SPEED_UNKNOWN)
+		speed = 0;
+
+	buffsize = mlxsw_sp_span_buffsize_get(mlxsw_sp, speed, mtu);
+	mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, buffsize);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
 }
 
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
 {
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int err;
-
 	/* If port is egress mirrored, the shared buffer size should be
 	 * updated according to the mtu value
 	 */
-	if (mlxsw_sp_span_is_egress_mirror(port)) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+	if (mlxsw_sp_span_is_egress_mirror(port))
+		return mlxsw_sp_span_port_buffsize_update(port, mtu);
+	return 0;
+}
 
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
-			return err;
-		}
-	}
+void mlxsw_sp_span_speed_update_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlxsw_sp_port *mlxsw_sp_port;
 
-	return 0;
+	mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port,
+				     span.speed_update_dw);
+
+	/* If port is egress mirrored, the shared buffer size should be
+	 * updated according to the speed value.
+	 */
+	if (mlxsw_sp_span_is_egress_mirror(mlxsw_sp_port))
+		mlxsw_sp_span_port_buffsize_update(mlxsw_sp_port,
+						   mlxsw_sp_port->dev->mtu);
 }
 
 static struct mlxsw_sp_span_inspected_port *
@@ -836,15 +853,9 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
 
 	/* if it is an egress SPAN, bind a shared buffer to it */
 	if (type == MLXSW_SP_SPAN_EGRESS) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
-							     port->dev->mtu);
-
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+		err = mlxsw_sp_span_port_buffsize_update(port, port->dev->mtu);
+		if (err)
 			return err;
-		}
 	}
 
 	if (bind) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 5e04252f2a11..59724335525f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -74,5 +74,6 @@ void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_span_entry *span_entry);
 
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+void mlxsw_sp_span_speed_update_work(struct work_struct *work);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index e0d7c49ffae0..60205aa3f6a5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -9,6 +9,20 @@
 #include "reg.h"
 #include "spectrum.h"
 
+/* All driver-specific traps must be documented in
+ * Documentation/networking/devlink/mlxsw.rst
+ */
+enum {
+	DEVLINK_MLXSW_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
+	DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED,
+	DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED,
+};
+
+#define DEVLINK_MLXSW_TRAP_NAME_IRIF_DISABLED \
+	"irif_disabled"
+#define DEVLINK_MLXSW_TRAP_NAME_ERIF_DISABLED \
+	"erif_disabled"
+
 #define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
 
 static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
@@ -21,6 +35,12 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
 			     DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
 			     MLXSW_SP_TRAP_METADATA)
 
+#define MLXSW_SP_TRAP_DRIVER_DROP(_id, _group_id)			      \
+	DEVLINK_TRAP_DRIVER(DROP, DROP, DEVLINK_MLXSW_TRAP_ID_##_id,	      \
+			    DEVLINK_MLXSW_TRAP_NAME_##_id,		      \
+			    DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
+			    MLXSW_SP_TRAP_METADATA)
+
 #define MLXSW_SP_TRAP_EXCEPTION(_id, _group_id)		      \
 	DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id,			      \
 			     DEVLINK_TRAP_GROUP_GENERIC(_group_id),	      \
@@ -58,6 +78,11 @@ static struct devlink_trap mlxsw_sp_traps_arr[] = {
 	MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS),
 	MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, L3_DROPS),
 	MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, L3_DROPS),
+	MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS),
+	MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS),
+	MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS),
+	MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS),
+	MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS),
 };
 
 static struct mlxsw_listener mlxsw_sp_listeners_arr[] = {
@@ -90,6 +115,15 @@ static struct mlxsw_listener mlxsw_sp_listeners_arr[] = {
 			       TRAP_EXCEPTION_TO_CPU),
 	MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, ROUTER_EXP,
 			       TRAP_EXCEPTION_TO_CPU),
+	MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS),
+	MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS),
+	MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, ROUTER_EXP, TRAP_EXCEPTION_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, ROUTER_EXP,
+			       TRAP_EXCEPTION_TO_CPU),
+	MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS,
+			       TRAP_EXCEPTION_TO_CPU),
+	MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS),
 };
 
 /* Mapping between hardware trap and devlink trap. Multiple hardware traps can
@@ -123,6 +157,13 @@ static u16 mlxsw_sp_listener_devlink_map[] = {
 	DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH,
 	DEVLINK_TRAP_GENERIC_ID_IPV4_LPM_UNICAST_MISS,
 	DEVLINK_TRAP_GENERIC_ID_IPV6_LPM_UNICAST_MISS,
+	DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED,
+	DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED,
+	DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE,
+	DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR,
+	DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR,
+	DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR,
+	DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC,
 };
 
 static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
@@ -304,8 +345,9 @@ mlxsw_sp_trap_group_policer_init(struct mlxsw_sp *mlxsw_sp,
 	u32 rate;
 
 	switch (group->id) {
-	case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS:/* fall through */
-	case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS:
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: /* fall through */
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: /* fall through */
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS:
 		policer_id = MLXSW_SP_DISCARD_POLICER_ID;
 		ir_units = MLXSW_REG_QPCR_IR_UNITS_M;
 		is_bytes = false;
@@ -342,6 +384,12 @@ __mlxsw_sp_trap_group_init(struct mlxsw_sp *mlxsw_sp,
 		priority = 0;
 		tc = 1;
 		break;
+	case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS:
+		group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS;
+		policer_id = MLXSW_SP_DISCARD_POLICER_ID;
+		priority = 0;
+		tc = 1;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 0c1c142bb6b0..12e1fa998d42 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -67,6 +67,7 @@ enum {
 	MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
+	MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A,
 	MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130,
 	MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131,
 	MLXSW_TRAP_ID_DISCARD_ING_PACKET_SMAC_MC = 0x140,
@@ -80,12 +81,20 @@ enum {
 	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_UC_DIP_MC_DMAC = 0x161,
 	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LB = 0x162,
 	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_MC = 0x163,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_CLASS_E = 0x164,
 	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LB = 0x165,
 	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_CORRUPTED_IP_HDR = 0x167,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_MC_DMAC = 0x168,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_DIP = 0x169,
 	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A,
 	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B,
+	MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C,
+	MLXSW_TRAP_ID_DISCARD_ROUTER_IRIF_EN = 0x178,
+	MLXSW_TRAP_ID_DISCARD_ROUTER_ERIF_EN = 0x179,
 	MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B,
 	MLXSW_TRAP_ID_DISCARD_ROUTER_LPM6 = 0x17C,
+	MLXSW_TRAP_ID_DISCARD_DEC_PKT = 0x188,
+	MLXSW_TRAP_ID_DISCARD_OVERLAY_SMAC_MC = 0x190,
 	MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_RESERVED_SCOPE = 0x1B0,
 	MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE = 0x1B1,
 	MLXSW_TRAP_ID_ACL0 = 0x1C0,
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index da329ca115cc..f3f6dfe3eddc 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -1103,7 +1103,7 @@ static void ks8842_tx_timeout_work(struct work_struct *work)
 		__ks8842_start_new_rx_dma(netdev);
 }
 
-static void ks8842_tx_timeout(struct net_device *netdev)
+static void ks8842_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ks8842_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index e102e1560ac7..d1444ba36e10 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -4896,7 +4896,7 @@ unlock:
  * triggered to free up resources so that the transmit routine can continue
  * sending out packets.  The hardware is reset to correct the problem.
  */
-static void netdev_tx_timeout(struct net_device *dev)
+static void netdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	static unsigned long last_reset;
 
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 0567e4f387a5..09cdc2f2e7ff 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1325,7 +1325,7 @@ static irqreturn_t enc28j60_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void enc28j60_tx_timeout(struct net_device *ndev)
+static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct enc28j60_net *priv = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 52c41d11f565..39925e4bf2ec 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -892,7 +892,7 @@ static netdev_tx_t encx24j600_tx(struct sk_buff *skb, struct net_device *dev)
 }
 
 /* Deal with a transmit timeout */
-static void encx24j600_tx_timeout(struct net_device *dev)
+static void encx24j600_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct encx24j600_priv *priv = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index afe52463dc57..9399f6a98748 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c
@@ -1265,6 +1265,9 @@ int lan743x_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 
 		lan743x_ptp_set_sync_ts_insert(adapter, true);
 		break;
+	case HWTSTAMP_TX_ONESTEP_P2P:
+		ret = -ERANGE;
+		break;
 	default:
 		netif_warn(adapter, drv, adapter->netdev,
 			   "  tx_type = %d, UNKNOWN\n", config.tx_type);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 985b46d7e3d1..86d543ab1ab9 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -500,13 +500,14 @@ EXPORT_SYMBOL(ocelot_port_enable);
 static int ocelot_port_open(struct net_device *dev)
 {
 	struct ocelot_port_private *priv = netdev_priv(dev);
-	struct ocelot *ocelot = priv->port.ocelot;
+	struct ocelot_port *ocelot_port = &priv->port;
+	struct ocelot *ocelot = ocelot_port->ocelot;
 	int port = priv->chip_port;
 	int err;
 
 	if (priv->serdes) {
 		err = phy_set_mode_ext(priv->serdes, PHY_MODE_ETHERNET,
-				       priv->phy_mode);
+				       ocelot_port->phy_mode);
 		if (err) {
 			netdev_err(dev, "Could not set mode of SerDes\n");
 			return err;
@@ -514,7 +515,7 @@ static int ocelot_port_open(struct net_device *dev)
 	}
 
 	err = phy_connect_direct(dev, priv->phy, &ocelot_port_adjust_link,
-				 priv->phy_mode);
+				 ocelot_port->phy_mode);
 	if (err) {
 		netdev_err(dev, "Could not attach to PHY\n");
 		return err;
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index c259114c48fd..04372ba72fec 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -18,11 +18,11 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/regmap.h>
 
+#include <soc/mscc/ocelot_qsys.h>
 #include <soc/mscc/ocelot_sys.h>
+#include <soc/mscc/ocelot_dev.h>
+#include <soc/mscc/ocelot_ana.h>
 #include <soc/mscc/ocelot.h>
-#include "ocelot_ana.h"
-#include "ocelot_dev.h"
-#include "ocelot_qsys.h"
 #include "ocelot_rew.h"
 #include "ocelot_qs.h"
 #include "ocelot_tc.h"
@@ -68,7 +68,6 @@ struct ocelot_port_private {
 
 	u8 vlan_aware;
 
-	phy_interface_t phy_mode;
 	struct phy *serdes;
 
 	struct ocelot_port_tc tc;
diff --git a/drivers/net/ethernet/mscc/ocelot_ana.h b/drivers/net/ethernet/mscc/ocelot_ana.h
deleted file mode 100644
index 841c6ec22b64..000000000000
--- a/drivers/net/ethernet/mscc/ocelot_ana.h
+++ /dev/null
@@ -1,625 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/*
- * Microsemi Ocelot Switch driver
- *
- * Copyright (c) 2017 Microsemi Corporation
- */
-
-#ifndef _MSCC_OCELOT_ANA_H_
-#define _MSCC_OCELOT_ANA_H_
-
-#define ANA_ANAGEFIL_B_DOM_EN                             BIT(22)
-#define ANA_ANAGEFIL_B_DOM_VAL                            BIT(21)
-#define ANA_ANAGEFIL_AGE_LOCKED                           BIT(20)
-#define ANA_ANAGEFIL_PID_EN                               BIT(19)
-#define ANA_ANAGEFIL_PID_VAL(x)                           (((x) << 14) & GENMASK(18, 14))
-#define ANA_ANAGEFIL_PID_VAL_M                            GENMASK(18, 14)
-#define ANA_ANAGEFIL_PID_VAL_X(x)                         (((x) & GENMASK(18, 14)) >> 14)
-#define ANA_ANAGEFIL_VID_EN                               BIT(13)
-#define ANA_ANAGEFIL_VID_VAL(x)                           ((x) & GENMASK(12, 0))
-#define ANA_ANAGEFIL_VID_VAL_M                            GENMASK(12, 0)
-
-#define ANA_STORMLIMIT_CFG_RSZ                            0x4
-
-#define ANA_STORMLIMIT_CFG_STORM_RATE(x)                  (((x) << 3) & GENMASK(6, 3))
-#define ANA_STORMLIMIT_CFG_STORM_RATE_M                   GENMASK(6, 3)
-#define ANA_STORMLIMIT_CFG_STORM_RATE_X(x)                (((x) & GENMASK(6, 3)) >> 3)
-#define ANA_STORMLIMIT_CFG_STORM_UNIT                     BIT(2)
-#define ANA_STORMLIMIT_CFG_STORM_MODE(x)                  ((x) & GENMASK(1, 0))
-#define ANA_STORMLIMIT_CFG_STORM_MODE_M                   GENMASK(1, 0)
-
-#define ANA_AUTOAGE_AGE_FAST                              BIT(21)
-#define ANA_AUTOAGE_AGE_PERIOD(x)                         (((x) << 1) & GENMASK(20, 1))
-#define ANA_AUTOAGE_AGE_PERIOD_M                          GENMASK(20, 1)
-#define ANA_AUTOAGE_AGE_PERIOD_X(x)                       (((x) & GENMASK(20, 1)) >> 1)
-#define ANA_AUTOAGE_AUTOAGE_LOCKED                        BIT(0)
-
-#define ANA_MACTOPTIONS_REDUCED_TABLE                     BIT(1)
-#define ANA_MACTOPTIONS_SHADOW                            BIT(0)
-
-#define ANA_AGENCTRL_FID_MASK(x)                          (((x) << 12) & GENMASK(23, 12))
-#define ANA_AGENCTRL_FID_MASK_M                           GENMASK(23, 12)
-#define ANA_AGENCTRL_FID_MASK_X(x)                        (((x) & GENMASK(23, 12)) >> 12)
-#define ANA_AGENCTRL_IGNORE_DMAC_FLAGS                    BIT(11)
-#define ANA_AGENCTRL_IGNORE_SMAC_FLAGS                    BIT(10)
-#define ANA_AGENCTRL_FLOOD_SPECIAL                        BIT(9)
-#define ANA_AGENCTRL_FLOOD_IGNORE_VLAN                    BIT(8)
-#define ANA_AGENCTRL_MIRROR_CPU                           BIT(7)
-#define ANA_AGENCTRL_LEARN_CPU_COPY                       BIT(6)
-#define ANA_AGENCTRL_LEARN_FWD_KILL                       BIT(5)
-#define ANA_AGENCTRL_LEARN_IGNORE_VLAN                    BIT(4)
-#define ANA_AGENCTRL_CPU_CPU_KILL_ENA                     BIT(3)
-#define ANA_AGENCTRL_GREEN_COUNT_MODE                     BIT(2)
-#define ANA_AGENCTRL_YELLOW_COUNT_MODE                    BIT(1)
-#define ANA_AGENCTRL_RED_COUNT_MODE                       BIT(0)
-
-#define ANA_FLOODING_RSZ                                  0x4
-
-#define ANA_FLOODING_FLD_UNICAST(x)                       (((x) << 12) & GENMASK(17, 12))
-#define ANA_FLOODING_FLD_UNICAST_M                        GENMASK(17, 12)
-#define ANA_FLOODING_FLD_UNICAST_X(x)                     (((x) & GENMASK(17, 12)) >> 12)
-#define ANA_FLOODING_FLD_BROADCAST(x)                     (((x) << 6) & GENMASK(11, 6))
-#define ANA_FLOODING_FLD_BROADCAST_M                      GENMASK(11, 6)
-#define ANA_FLOODING_FLD_BROADCAST_X(x)                   (((x) & GENMASK(11, 6)) >> 6)
-#define ANA_FLOODING_FLD_MULTICAST(x)                     ((x) & GENMASK(5, 0))
-#define ANA_FLOODING_FLD_MULTICAST_M                      GENMASK(5, 0)
-
-#define ANA_FLOODING_IPMC_FLD_MC4_CTRL(x)                 (((x) << 18) & GENMASK(23, 18))
-#define ANA_FLOODING_IPMC_FLD_MC4_CTRL_M                  GENMASK(23, 18)
-#define ANA_FLOODING_IPMC_FLD_MC4_CTRL_X(x)               (((x) & GENMASK(23, 18)) >> 18)
-#define ANA_FLOODING_IPMC_FLD_MC4_DATA(x)                 (((x) << 12) & GENMASK(17, 12))
-#define ANA_FLOODING_IPMC_FLD_MC4_DATA_M                  GENMASK(17, 12)
-#define ANA_FLOODING_IPMC_FLD_MC4_DATA_X(x)               (((x) & GENMASK(17, 12)) >> 12)
-#define ANA_FLOODING_IPMC_FLD_MC6_CTRL(x)                 (((x) << 6) & GENMASK(11, 6))
-#define ANA_FLOODING_IPMC_FLD_MC6_CTRL_M                  GENMASK(11, 6)
-#define ANA_FLOODING_IPMC_FLD_MC6_CTRL_X(x)               (((x) & GENMASK(11, 6)) >> 6)
-#define ANA_FLOODING_IPMC_FLD_MC6_DATA(x)                 ((x) & GENMASK(5, 0))
-#define ANA_FLOODING_IPMC_FLD_MC6_DATA_M                  GENMASK(5, 0)
-
-#define ANA_SFLOW_CFG_RSZ                                 0x4
-
-#define ANA_SFLOW_CFG_SF_RATE(x)                          (((x) << 2) & GENMASK(13, 2))
-#define ANA_SFLOW_CFG_SF_RATE_M                           GENMASK(13, 2)
-#define ANA_SFLOW_CFG_SF_RATE_X(x)                        (((x) & GENMASK(13, 2)) >> 2)
-#define ANA_SFLOW_CFG_SF_SAMPLE_RX                        BIT(1)
-#define ANA_SFLOW_CFG_SF_SAMPLE_TX                        BIT(0)
-
-#define ANA_PORT_MODE_RSZ                                 0x4
-
-#define ANA_PORT_MODE_REDTAG_PARSE_CFG                    BIT(3)
-#define ANA_PORT_MODE_VLAN_PARSE_CFG(x)                   (((x) << 1) & GENMASK(2, 1))
-#define ANA_PORT_MODE_VLAN_PARSE_CFG_M                    GENMASK(2, 1)
-#define ANA_PORT_MODE_VLAN_PARSE_CFG_X(x)                 (((x) & GENMASK(2, 1)) >> 1)
-#define ANA_PORT_MODE_L3_PARSE_CFG                        BIT(0)
-
-#define ANA_CUT_THRU_CFG_RSZ                              0x4
-
-#define ANA_PGID_PGID_RSZ                                 0x4
-
-#define ANA_PGID_PGID_PGID(x)                             ((x) & GENMASK(11, 0))
-#define ANA_PGID_PGID_PGID_M                              GENMASK(11, 0)
-#define ANA_PGID_PGID_CPUQ_DST_PGID(x)                    (((x) << 27) & GENMASK(29, 27))
-#define ANA_PGID_PGID_CPUQ_DST_PGID_M                     GENMASK(29, 27)
-#define ANA_PGID_PGID_CPUQ_DST_PGID_X(x)                  (((x) & GENMASK(29, 27)) >> 27)
-
-#define ANA_TABLES_MACHDATA_VID(x)                        (((x) << 16) & GENMASK(28, 16))
-#define ANA_TABLES_MACHDATA_VID_M                         GENMASK(28, 16)
-#define ANA_TABLES_MACHDATA_VID_X(x)                      (((x) & GENMASK(28, 16)) >> 16)
-#define ANA_TABLES_MACHDATA_MACHDATA(x)                   ((x) & GENMASK(15, 0))
-#define ANA_TABLES_MACHDATA_MACHDATA_M                    GENMASK(15, 0)
-
-#define ANA_TABLES_STREAMDATA_SSID_VALID                  BIT(16)
-#define ANA_TABLES_STREAMDATA_SSID(x)                     (((x) << 9) & GENMASK(15, 9))
-#define ANA_TABLES_STREAMDATA_SSID_M                      GENMASK(15, 9)
-#define ANA_TABLES_STREAMDATA_SSID_X(x)                   (((x) & GENMASK(15, 9)) >> 9)
-#define ANA_TABLES_STREAMDATA_SFID_VALID                  BIT(8)
-#define ANA_TABLES_STREAMDATA_SFID(x)                     ((x) & GENMASK(7, 0))
-#define ANA_TABLES_STREAMDATA_SFID_M                      GENMASK(7, 0)
-
-#define ANA_TABLES_MACACCESS_MAC_CPU_COPY                 BIT(15)
-#define ANA_TABLES_MACACCESS_SRC_KILL                     BIT(14)
-#define ANA_TABLES_MACACCESS_IGNORE_VLAN                  BIT(13)
-#define ANA_TABLES_MACACCESS_AGED_FLAG                    BIT(12)
-#define ANA_TABLES_MACACCESS_VALID                        BIT(11)
-#define ANA_TABLES_MACACCESS_ENTRYTYPE(x)                 (((x) << 9) & GENMASK(10, 9))
-#define ANA_TABLES_MACACCESS_ENTRYTYPE_M                  GENMASK(10, 9)
-#define ANA_TABLES_MACACCESS_ENTRYTYPE_X(x)               (((x) & GENMASK(10, 9)) >> 9)
-#define ANA_TABLES_MACACCESS_DEST_IDX(x)                  (((x) << 3) & GENMASK(8, 3))
-#define ANA_TABLES_MACACCESS_DEST_IDX_M                   GENMASK(8, 3)
-#define ANA_TABLES_MACACCESS_DEST_IDX_X(x)                (((x) & GENMASK(8, 3)) >> 3)
-#define ANA_TABLES_MACACCESS_MAC_TABLE_CMD(x)             ((x) & GENMASK(2, 0))
-#define ANA_TABLES_MACACCESS_MAC_TABLE_CMD_M              GENMASK(2, 0)
-#define MACACCESS_CMD_IDLE                     0
-#define MACACCESS_CMD_LEARN                    1
-#define MACACCESS_CMD_FORGET                   2
-#define MACACCESS_CMD_AGE                      3
-#define MACACCESS_CMD_GET_NEXT                 4
-#define MACACCESS_CMD_INIT                     5
-#define MACACCESS_CMD_READ                     6
-#define MACACCESS_CMD_WRITE                    7
-
-#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK(x)           (((x) << 2) & GENMASK(13, 2))
-#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK_M            GENMASK(13, 2)
-#define ANA_TABLES_VLANACCESS_VLAN_PORT_MASK_X(x)         (((x) & GENMASK(13, 2)) >> 2)
-#define ANA_TABLES_VLANACCESS_VLAN_TBL_CMD(x)             ((x) & GENMASK(1, 0))
-#define ANA_TABLES_VLANACCESS_VLAN_TBL_CMD_M              GENMASK(1, 0)
-#define ANA_TABLES_VLANACCESS_CMD_IDLE                    0x0
-#define ANA_TABLES_VLANACCESS_CMD_WRITE                   0x2
-#define ANA_TABLES_VLANACCESS_CMD_INIT                    0x3
-
-#define ANA_TABLES_VLANTIDX_VLAN_SEC_FWD_ENA              BIT(17)
-#define ANA_TABLES_VLANTIDX_VLAN_FLOOD_DIS                BIT(16)
-#define ANA_TABLES_VLANTIDX_VLAN_PRIV_VLAN                BIT(15)
-#define ANA_TABLES_VLANTIDX_VLAN_LEARN_DISABLED           BIT(14)
-#define ANA_TABLES_VLANTIDX_VLAN_MIRROR                   BIT(13)
-#define ANA_TABLES_VLANTIDX_VLAN_SRC_CHK                  BIT(12)
-#define ANA_TABLES_VLANTIDX_V_INDEX(x)                    ((x) & GENMASK(11, 0))
-#define ANA_TABLES_VLANTIDX_V_INDEX_M                     GENMASK(11, 0)
-
-#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK(x)           (((x) << 2) & GENMASK(8, 2))
-#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK_M            GENMASK(8, 2)
-#define ANA_TABLES_ISDXACCESS_ISDX_PORT_MASK_X(x)         (((x) & GENMASK(8, 2)) >> 2)
-#define ANA_TABLES_ISDXACCESS_ISDX_TBL_CMD(x)             ((x) & GENMASK(1, 0))
-#define ANA_TABLES_ISDXACCESS_ISDX_TBL_CMD_M              GENMASK(1, 0)
-
-#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI(x)                 (((x) << 21) & GENMASK(28, 21))
-#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI_M                  GENMASK(28, 21)
-#define ANA_TABLES_ISDXTIDX_ISDX_SDLBI_X(x)               (((x) & GENMASK(28, 21)) >> 21)
-#define ANA_TABLES_ISDXTIDX_ISDX_MSTI(x)                  (((x) << 15) & GENMASK(20, 15))
-#define ANA_TABLES_ISDXTIDX_ISDX_MSTI_M                   GENMASK(20, 15)
-#define ANA_TABLES_ISDXTIDX_ISDX_MSTI_X(x)                (((x) & GENMASK(20, 15)) >> 15)
-#define ANA_TABLES_ISDXTIDX_ISDX_ES0_KEY_ENA              BIT(14)
-#define ANA_TABLES_ISDXTIDX_ISDX_FORCE_ENA                BIT(10)
-#define ANA_TABLES_ISDXTIDX_ISDX_INDEX(x)                 ((x) & GENMASK(7, 0))
-#define ANA_TABLES_ISDXTIDX_ISDX_INDEX_M                  GENMASK(7, 0)
-
-#define ANA_TABLES_ENTRYLIM_RSZ                           0x4
-
-#define ANA_TABLES_ENTRYLIM_ENTRYLIM(x)                   (((x) << 14) & GENMASK(17, 14))
-#define ANA_TABLES_ENTRYLIM_ENTRYLIM_M                    GENMASK(17, 14)
-#define ANA_TABLES_ENTRYLIM_ENTRYLIM_X(x)                 (((x) & GENMASK(17, 14)) >> 14)
-#define ANA_TABLES_ENTRYLIM_ENTRYSTAT(x)                  ((x) & GENMASK(13, 0))
-#define ANA_TABLES_ENTRYLIM_ENTRYSTAT_M                   GENMASK(13, 0)
-
-#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM(x)        (((x) << 4) & GENMASK(31, 4))
-#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM_M         GENMASK(31, 4)
-#define ANA_TABLES_STREAMACCESS_GEN_REC_SEQ_NUM_X(x)      (((x) & GENMASK(31, 4)) >> 4)
-#define ANA_TABLES_STREAMACCESS_SEQ_GEN_REC_ENA           BIT(3)
-#define ANA_TABLES_STREAMACCESS_GEN_REC_TYPE              BIT(2)
-#define ANA_TABLES_STREAMACCESS_STREAM_TBL_CMD(x)         ((x) & GENMASK(1, 0))
-#define ANA_TABLES_STREAMACCESS_STREAM_TBL_CMD_M          GENMASK(1, 0)
-
-#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS(x)       (((x) << 30) & GENMASK(31, 30))
-#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS_M        GENMASK(31, 30)
-#define ANA_TABLES_STREAMTIDX_SEQ_GEN_ERR_STATUS_X(x)     (((x) & GENMASK(31, 30)) >> 30)
-#define ANA_TABLES_STREAMTIDX_S_INDEX(x)                  (((x) << 16) & GENMASK(22, 16))
-#define ANA_TABLES_STREAMTIDX_S_INDEX_M                   GENMASK(22, 16)
-#define ANA_TABLES_STREAMTIDX_S_INDEX_X(x)                (((x) & GENMASK(22, 16)) >> 16)
-#define ANA_TABLES_STREAMTIDX_FORCE_SF_BEHAVIOUR          BIT(14)
-#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN(x)          (((x) << 8) & GENMASK(13, 8))
-#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN_M           GENMASK(13, 8)
-#define ANA_TABLES_STREAMTIDX_SEQ_HISTORY_LEN_X(x)        (((x) & GENMASK(13, 8)) >> 8)
-#define ANA_TABLES_STREAMTIDX_RESET_ON_ROGUE              BIT(7)
-#define ANA_TABLES_STREAMTIDX_REDTAG_POP                  BIT(6)
-#define ANA_TABLES_STREAMTIDX_STREAM_SPLIT                BIT(5)
-#define ANA_TABLES_STREAMTIDX_SEQ_SPACE_LOG2(x)           ((x) & GENMASK(4, 0))
-#define ANA_TABLES_STREAMTIDX_SEQ_SPACE_LOG2_M            GENMASK(4, 0)
-
-#define ANA_TABLES_SEQ_MASK_SPLIT_MASK(x)                 (((x) << 16) & GENMASK(22, 16))
-#define ANA_TABLES_SEQ_MASK_SPLIT_MASK_M                  GENMASK(22, 16)
-#define ANA_TABLES_SEQ_MASK_SPLIT_MASK_X(x)               (((x) & GENMASK(22, 16)) >> 16)
-#define ANA_TABLES_SEQ_MASK_INPUT_PORT_MASK(x)            ((x) & GENMASK(6, 0))
-#define ANA_TABLES_SEQ_MASK_INPUT_PORT_MASK_M             GENMASK(6, 0)
-
-#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK(x)             (((x) << 1) & GENMASK(7, 1))
-#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK_M              GENMASK(7, 1)
-#define ANA_TABLES_SFID_MASK_IGR_PORT_MASK_X(x)           (((x) & GENMASK(7, 1)) >> 1)
-#define ANA_TABLES_SFID_MASK_IGR_SRCPORT_MATCH_ENA        BIT(0)
-
-#define ANA_TABLES_SFIDACCESS_IGR_PRIO_MATCH_ENA          BIT(22)
-#define ANA_TABLES_SFIDACCESS_IGR_PRIO(x)                 (((x) << 19) & GENMASK(21, 19))
-#define ANA_TABLES_SFIDACCESS_IGR_PRIO_M                  GENMASK(21, 19)
-#define ANA_TABLES_SFIDACCESS_IGR_PRIO_X(x)               (((x) & GENMASK(21, 19)) >> 19)
-#define ANA_TABLES_SFIDACCESS_FORCE_BLOCK                 BIT(18)
-#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN(x)              (((x) << 2) & GENMASK(17, 2))
-#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN_M               GENMASK(17, 2)
-#define ANA_TABLES_SFIDACCESS_MAX_SDU_LEN_X(x)            (((x) & GENMASK(17, 2)) >> 2)
-#define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD(x)             ((x) & GENMASK(1, 0))
-#define ANA_TABLES_SFIDACCESS_SFID_TBL_CMD_M              GENMASK(1, 0)
-
-#define ANA_TABLES_SFIDTIDX_SGID_VALID                    BIT(26)
-#define ANA_TABLES_SFIDTIDX_SGID(x)                       (((x) << 18) & GENMASK(25, 18))
-#define ANA_TABLES_SFIDTIDX_SGID_M                        GENMASK(25, 18)
-#define ANA_TABLES_SFIDTIDX_SGID_X(x)                     (((x) & GENMASK(25, 18)) >> 18)
-#define ANA_TABLES_SFIDTIDX_POL_ENA                       BIT(17)
-#define ANA_TABLES_SFIDTIDX_POL_IDX(x)                    (((x) << 8) & GENMASK(16, 8))
-#define ANA_TABLES_SFIDTIDX_POL_IDX_M                     GENMASK(16, 8)
-#define ANA_TABLES_SFIDTIDX_POL_IDX_X(x)                  (((x) & GENMASK(16, 8)) >> 8)
-#define ANA_TABLES_SFIDTIDX_SFID_INDEX(x)                 ((x) & GENMASK(7, 0))
-#define ANA_TABLES_SFIDTIDX_SFID_INDEX_M                  GENMASK(7, 0)
-
-#define ANA_MSTI_STATE_RSZ                                0x4
-
-#define ANA_OAM_UPM_LM_CNT_RSZ                            0x4
-
-#define ANA_SG_ACCESS_CTRL_SGID(x)                        ((x) & GENMASK(7, 0))
-#define ANA_SG_ACCESS_CTRL_SGID_M                         GENMASK(7, 0)
-#define ANA_SG_ACCESS_CTRL_CONFIG_CHANGE                  BIT(28)
-
-#define ANA_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB(x)          ((x) & GENMASK(15, 0))
-#define ANA_SG_CONFIG_REG_3_BASE_TIME_SEC_MSB_M           GENMASK(15, 0)
-#define ANA_SG_CONFIG_REG_3_LIST_LENGTH(x)                (((x) << 16) & GENMASK(18, 16))
-#define ANA_SG_CONFIG_REG_3_LIST_LENGTH_M                 GENMASK(18, 16)
-#define ANA_SG_CONFIG_REG_3_LIST_LENGTH_X(x)              (((x) & GENMASK(18, 16)) >> 16)
-#define ANA_SG_CONFIG_REG_3_GATE_ENABLE                   BIT(20)
-#define ANA_SG_CONFIG_REG_3_INIT_IPS(x)                   (((x) << 24) & GENMASK(27, 24))
-#define ANA_SG_CONFIG_REG_3_INIT_IPS_M                    GENMASK(27, 24)
-#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x)                 (((x) & GENMASK(27, 24)) >> 24)
-#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE               BIT(28)
-
-#define ANA_SG_GCL_GS_CONFIG_RSZ                          0x4
-
-#define ANA_SG_GCL_GS_CONFIG_IPS(x)                       ((x) & GENMASK(3, 0))
-#define ANA_SG_GCL_GS_CONFIG_IPS_M                        GENMASK(3, 0)
-#define ANA_SG_GCL_GS_CONFIG_GATE_STATE                   BIT(4)
-
-#define ANA_SG_GCL_TI_CONFIG_RSZ                          0x4
-
-#define ANA_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB(x)       ((x) & GENMASK(15, 0))
-#define ANA_SG_STATUS_REG_3_CFG_CHG_TIME_SEC_MSB_M        GENMASK(15, 0)
-#define ANA_SG_STATUS_REG_3_GATE_STATE                    BIT(16)
-#define ANA_SG_STATUS_REG_3_IPS(x)                        (((x) << 20) & GENMASK(23, 20))
-#define ANA_SG_STATUS_REG_3_IPS_M                         GENMASK(23, 20)
-#define ANA_SG_STATUS_REG_3_IPS_X(x)                      (((x) & GENMASK(23, 20)) >> 20)
-#define ANA_SG_STATUS_REG_3_CONFIG_PENDING                BIT(24)
-
-#define ANA_PORT_VLAN_CFG_GSZ                             0x100
-
-#define ANA_PORT_VLAN_CFG_VLAN_VID_AS_ISDX                BIT(21)
-#define ANA_PORT_VLAN_CFG_VLAN_AWARE_ENA                  BIT(20)
-#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT(x)                 (((x) << 18) & GENMASK(19, 18))
-#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M                  GENMASK(19, 18)
-#define ANA_PORT_VLAN_CFG_VLAN_POP_CNT_X(x)               (((x) & GENMASK(19, 18)) >> 18)
-#define ANA_PORT_VLAN_CFG_VLAN_INNER_TAG_ENA              BIT(17)
-#define ANA_PORT_VLAN_CFG_VLAN_TAG_TYPE                   BIT(16)
-#define ANA_PORT_VLAN_CFG_VLAN_DEI                        BIT(15)
-#define ANA_PORT_VLAN_CFG_VLAN_PCP(x)                     (((x) << 12) & GENMASK(14, 12))
-#define ANA_PORT_VLAN_CFG_VLAN_PCP_M                      GENMASK(14, 12)
-#define ANA_PORT_VLAN_CFG_VLAN_PCP_X(x)                   (((x) & GENMASK(14, 12)) >> 12)
-#define ANA_PORT_VLAN_CFG_VLAN_VID(x)                     ((x) & GENMASK(11, 0))
-#define ANA_PORT_VLAN_CFG_VLAN_VID_M                      GENMASK(11, 0)
-
-#define ANA_PORT_DROP_CFG_GSZ                             0x100
-
-#define ANA_PORT_DROP_CFG_DROP_UNTAGGED_ENA               BIT(6)
-#define ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA               BIT(5)
-#define ANA_PORT_DROP_CFG_DROP_C_TAGGED_ENA               BIT(4)
-#define ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA          BIT(3)
-#define ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA          BIT(2)
-#define ANA_PORT_DROP_CFG_DROP_NULL_MAC_ENA               BIT(1)
-#define ANA_PORT_DROP_CFG_DROP_MC_SMAC_ENA                BIT(0)
-
-#define ANA_PORT_QOS_CFG_GSZ                              0x100
-
-#define ANA_PORT_QOS_CFG_DP_DEFAULT_VAL                   BIT(8)
-#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL(x)               (((x) << 5) & GENMASK(7, 5))
-#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_M                GENMASK(7, 5)
-#define ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(x)             (((x) & GENMASK(7, 5)) >> 5)
-#define ANA_PORT_QOS_CFG_QOS_DSCP_ENA                     BIT(4)
-#define ANA_PORT_QOS_CFG_QOS_PCP_ENA                      BIT(3)
-#define ANA_PORT_QOS_CFG_DSCP_TRANSLATE_ENA               BIT(2)
-#define ANA_PORT_QOS_CFG_DSCP_REWR_CFG(x)                 ((x) & GENMASK(1, 0))
-#define ANA_PORT_QOS_CFG_DSCP_REWR_CFG_M                  GENMASK(1, 0)
-
-#define ANA_PORT_VCAP_CFG_GSZ                             0x100
-
-#define ANA_PORT_VCAP_CFG_S1_ENA                          BIT(14)
-#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA(x)              (((x) << 11) & GENMASK(13, 11))
-#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA_M               GENMASK(13, 11)
-#define ANA_PORT_VCAP_CFG_S1_DMAC_DIP_ENA_X(x)            (((x) & GENMASK(13, 11)) >> 11)
-#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA(x)        (((x) << 8) & GENMASK(10, 8))
-#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA_M         GENMASK(10, 8)
-#define ANA_PORT_VCAP_CFG_S1_VLAN_INNER_TAG_ENA_X(x)      (((x) & GENMASK(10, 8)) >> 8)
-#define ANA_PORT_VCAP_CFG_PAG_VAL(x)                      ((x) & GENMASK(7, 0))
-#define ANA_PORT_VCAP_CFG_PAG_VAL_M                       GENMASK(7, 0)
-
-#define ANA_PORT_VCAP_S1_KEY_CFG_GSZ                      0x100
-#define ANA_PORT_VCAP_S1_KEY_CFG_RSZ                      0x4
-
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG(x)        (((x) << 4) & GENMASK(6, 4))
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG_M         GENMASK(6, 4)
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP6_CFG_X(x)      (((x) & GENMASK(6, 4)) >> 4)
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG(x)        (((x) << 2) & GENMASK(3, 2))
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG_M         GENMASK(3, 2)
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_IP4_CFG_X(x)      (((x) & GENMASK(3, 2)) >> 2)
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_OTHER_CFG(x)      ((x) & GENMASK(1, 0))
-#define ANA_PORT_VCAP_S1_KEY_CFG_S1_KEY_OTHER_CFG_M       GENMASK(1, 0)
-
-#define ANA_PORT_VCAP_S2_CFG_GSZ                          0x100
-
-#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA(x)        (((x) << 17) & GENMASK(18, 17))
-#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA_M         GENMASK(18, 17)
-#define ANA_PORT_VCAP_S2_CFG_S2_UDP_PAYLOAD_ENA_X(x)      (((x) & GENMASK(18, 17)) >> 17)
-#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA(x)      (((x) << 15) & GENMASK(16, 15))
-#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA_M       GENMASK(16, 15)
-#define ANA_PORT_VCAP_S2_CFG_S2_ETYPE_PAYLOAD_ENA_X(x)    (((x) & GENMASK(16, 15)) >> 15)
-#define ANA_PORT_VCAP_S2_CFG_S2_ENA                       BIT(14)
-#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS(x)               (((x) << 12) & GENMASK(13, 12))
-#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_M                GENMASK(13, 12)
-#define ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_X(x)             (((x) & GENMASK(13, 12)) >> 12)
-#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS(x)                (((x) << 10) & GENMASK(11, 10))
-#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_M                 GENMASK(11, 10)
-#define ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_X(x)              (((x) & GENMASK(11, 10)) >> 10)
-#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS(x)          (((x) << 8) & GENMASK(9, 8))
-#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_M           GENMASK(9, 8)
-#define ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_X(x)        (((x) & GENMASK(9, 8)) >> 8)
-#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS(x)           (((x) << 6) & GENMASK(7, 6))
-#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_M            GENMASK(7, 6)
-#define ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_X(x)         (((x) & GENMASK(7, 6)) >> 6)
-#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(x)                (((x) << 2) & GENMASK(5, 2))
-#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG_M                 GENMASK(5, 2)
-#define ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG_X(x)              (((x) & GENMASK(5, 2)) >> 2)
-#define ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS(x)                ((x) & GENMASK(1, 0))
-#define ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS_M                 GENMASK(1, 0)
-
-#define ANA_PORT_PCP_DEI_MAP_GSZ                          0x100
-#define ANA_PORT_PCP_DEI_MAP_RSZ                          0x4
-
-#define ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL               BIT(3)
-#define ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(x)           ((x) & GENMASK(2, 0))
-#define ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL_M            GENMASK(2, 0)
-
-#define ANA_PORT_CPU_FWD_CFG_GSZ                          0x100
-
-#define ANA_PORT_CPU_FWD_CFG_CPU_VRAP_REDIR_ENA           BIT(7)
-#define ANA_PORT_CPU_FWD_CFG_CPU_MLD_REDIR_ENA            BIT(6)
-#define ANA_PORT_CPU_FWD_CFG_CPU_IGMP_REDIR_ENA           BIT(5)
-#define ANA_PORT_CPU_FWD_CFG_CPU_IPMC_CTRL_COPY_ENA       BIT(4)
-#define ANA_PORT_CPU_FWD_CFG_CPU_SRC_COPY_ENA             BIT(3)
-#define ANA_PORT_CPU_FWD_CFG_CPU_ALLBRIDGE_DROP_ENA       BIT(2)
-#define ANA_PORT_CPU_FWD_CFG_CPU_ALLBRIDGE_REDIR_ENA      BIT(1)
-#define ANA_PORT_CPU_FWD_CFG_CPU_OAM_ENA                  BIT(0)
-
-#define ANA_PORT_CPU_FWD_BPDU_CFG_GSZ                     0x100
-
-#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA(x)        (((x) << 16) & GENMASK(31, 16))
-#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA_M         GENMASK(31, 16)
-#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_DROP_ENA_X(x)      (((x) & GENMASK(31, 16)) >> 16)
-#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(x)       ((x) & GENMASK(15, 0))
-#define ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA_M        GENMASK(15, 0)
-
-#define ANA_PORT_CPU_FWD_GARP_CFG_GSZ                     0x100
-
-#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA(x)        (((x) << 16) & GENMASK(31, 16))
-#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA_M         GENMASK(31, 16)
-#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_DROP_ENA_X(x)      (((x) & GENMASK(31, 16)) >> 16)
-#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_REDIR_ENA(x)       ((x) & GENMASK(15, 0))
-#define ANA_PORT_CPU_FWD_GARP_CFG_GARP_REDIR_ENA_M        GENMASK(15, 0)
-
-#define ANA_PORT_CPU_FWD_CCM_CFG_GSZ                      0x100
-
-#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA(x)          (((x) << 16) & GENMASK(31, 16))
-#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA_M           GENMASK(31, 16)
-#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_DROP_ENA_X(x)        (((x) & GENMASK(31, 16)) >> 16)
-#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_REDIR_ENA(x)         ((x) & GENMASK(15, 0))
-#define ANA_PORT_CPU_FWD_CCM_CFG_CCM_REDIR_ENA_M          GENMASK(15, 0)
-
-#define ANA_PORT_PORT_CFG_GSZ                             0x100
-
-#define ANA_PORT_PORT_CFG_SRC_MIRROR_ENA                  BIT(15)
-#define ANA_PORT_PORT_CFG_LIMIT_DROP                      BIT(14)
-#define ANA_PORT_PORT_CFG_LIMIT_CPU                       BIT(13)
-#define ANA_PORT_PORT_CFG_LOCKED_PORTMOVE_DROP            BIT(12)
-#define ANA_PORT_PORT_CFG_LOCKED_PORTMOVE_CPU             BIT(11)
-#define ANA_PORT_PORT_CFG_LEARNDROP                       BIT(10)
-#define ANA_PORT_PORT_CFG_LEARNCPU                        BIT(9)
-#define ANA_PORT_PORT_CFG_LEARNAUTO                       BIT(8)
-#define ANA_PORT_PORT_CFG_LEARN_ENA                       BIT(7)
-#define ANA_PORT_PORT_CFG_RECV_ENA                        BIT(6)
-#define ANA_PORT_PORT_CFG_PORTID_VAL(x)                   (((x) << 2) & GENMASK(5, 2))
-#define ANA_PORT_PORT_CFG_PORTID_VAL_M                    GENMASK(5, 2)
-#define ANA_PORT_PORT_CFG_PORTID_VAL_X(x)                 (((x) & GENMASK(5, 2)) >> 2)
-#define ANA_PORT_PORT_CFG_USE_B_DOM_TBL                   BIT(1)
-#define ANA_PORT_PORT_CFG_LSR_MODE                        BIT(0)
-
-#define ANA_PORT_POL_CFG_GSZ                              0x100
-
-#define ANA_PORT_POL_CFG_POL_CPU_REDIR_8021               BIT(19)
-#define ANA_PORT_POL_CFG_POL_CPU_REDIR_IP                 BIT(18)
-#define ANA_PORT_POL_CFG_PORT_POL_ENA                     BIT(17)
-#define ANA_PORT_POL_CFG_QUEUE_POL_ENA(x)                 (((x) << 9) & GENMASK(16, 9))
-#define ANA_PORT_POL_CFG_QUEUE_POL_ENA_M                  GENMASK(16, 9)
-#define ANA_PORT_POL_CFG_QUEUE_POL_ENA_X(x)               (((x) & GENMASK(16, 9)) >> 9)
-#define ANA_PORT_POL_CFG_POL_ORDER(x)                     ((x) & GENMASK(8, 0))
-#define ANA_PORT_POL_CFG_POL_ORDER_M                      GENMASK(8, 0)
-
-#define ANA_PORT_PTP_CFG_GSZ                              0x100
-
-#define ANA_PORT_PTP_CFG_PTP_BACKPLANE_MODE               BIT(0)
-
-#define ANA_PORT_PTP_DLY1_CFG_GSZ                         0x100
-
-#define ANA_PORT_PTP_DLY2_CFG_GSZ                         0x100
-
-#define ANA_PORT_SFID_CFG_GSZ                             0x100
-#define ANA_PORT_SFID_CFG_RSZ                             0x4
-
-#define ANA_PORT_SFID_CFG_SFID_VALID                      BIT(8)
-#define ANA_PORT_SFID_CFG_SFID(x)                         ((x) & GENMASK(7, 0))
-#define ANA_PORT_SFID_CFG_SFID_M                          GENMASK(7, 0)
-
-#define ANA_PFC_PFC_CFG_GSZ                               0x40
-
-#define ANA_PFC_PFC_CFG_RX_PFC_ENA(x)                     (((x) << 2) & GENMASK(9, 2))
-#define ANA_PFC_PFC_CFG_RX_PFC_ENA_M                      GENMASK(9, 2)
-#define ANA_PFC_PFC_CFG_RX_PFC_ENA_X(x)                   (((x) & GENMASK(9, 2)) >> 2)
-#define ANA_PFC_PFC_CFG_FC_LINK_SPEED(x)                  ((x) & GENMASK(1, 0))
-#define ANA_PFC_PFC_CFG_FC_LINK_SPEED_M                   GENMASK(1, 0)
-
-#define ANA_PFC_PFC_TIMER_GSZ                             0x40
-#define ANA_PFC_PFC_TIMER_RSZ                             0x4
-
-#define ANA_IPT_OAM_MEP_CFG_GSZ                           0x8
-
-#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P(x)                  (((x) << 6) & GENMASK(10, 6))
-#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P_M                   GENMASK(10, 6)
-#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_P_X(x)                (((x) & GENMASK(10, 6)) >> 6)
-#define ANA_IPT_OAM_MEP_CFG_MEP_IDX(x)                    (((x) << 1) & GENMASK(5, 1))
-#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_M                     GENMASK(5, 1)
-#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_X(x)                  (((x) & GENMASK(5, 1)) >> 1)
-#define ANA_IPT_OAM_MEP_CFG_MEP_IDX_ENA                   BIT(0)
-
-#define ANA_IPT_IPT_GSZ                                   0x8
-
-#define ANA_IPT_IPT_IPT_CFG(x)                            (((x) << 15) & GENMASK(16, 15))
-#define ANA_IPT_IPT_IPT_CFG_M                             GENMASK(16, 15)
-#define ANA_IPT_IPT_IPT_CFG_X(x)                          (((x) & GENMASK(16, 15)) >> 15)
-#define ANA_IPT_IPT_ISDX_P(x)                             (((x) << 7) & GENMASK(14, 7))
-#define ANA_IPT_IPT_ISDX_P_M                              GENMASK(14, 7)
-#define ANA_IPT_IPT_ISDX_P_X(x)                           (((x) & GENMASK(14, 7)) >> 7)
-#define ANA_IPT_IPT_PPT_IDX(x)                            ((x) & GENMASK(6, 0))
-#define ANA_IPT_IPT_PPT_IDX_M                             GENMASK(6, 0)
-
-#define ANA_PPT_PPT_RSZ                                   0x4
-
-#define ANA_FID_MAP_FID_MAP_RSZ                           0x4
-
-#define ANA_FID_MAP_FID_MAP_FID_C_VAL(x)                  (((x) << 6) & GENMASK(11, 6))
-#define ANA_FID_MAP_FID_MAP_FID_C_VAL_M                   GENMASK(11, 6)
-#define ANA_FID_MAP_FID_MAP_FID_C_VAL_X(x)                (((x) & GENMASK(11, 6)) >> 6)
-#define ANA_FID_MAP_FID_MAP_FID_B_VAL(x)                  ((x) & GENMASK(5, 0))
-#define ANA_FID_MAP_FID_MAP_FID_B_VAL_M                   GENMASK(5, 0)
-
-#define ANA_AGGR_CFG_AC_RND_ENA                           BIT(7)
-#define ANA_AGGR_CFG_AC_DMAC_ENA                          BIT(6)
-#define ANA_AGGR_CFG_AC_SMAC_ENA                          BIT(5)
-#define ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA                  BIT(4)
-#define ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA                    BIT(3)
-#define ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA                    BIT(2)
-#define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA                    BIT(1)
-#define ANA_AGGR_CFG_AC_ISDX_ENA                          BIT(0)
-
-#define ANA_CPUQ_CFG_CPUQ_MLD(x)                          (((x) << 27) & GENMASK(29, 27))
-#define ANA_CPUQ_CFG_CPUQ_MLD_M                           GENMASK(29, 27)
-#define ANA_CPUQ_CFG_CPUQ_MLD_X(x)                        (((x) & GENMASK(29, 27)) >> 27)
-#define ANA_CPUQ_CFG_CPUQ_IGMP(x)                         (((x) << 24) & GENMASK(26, 24))
-#define ANA_CPUQ_CFG_CPUQ_IGMP_M                          GENMASK(26, 24)
-#define ANA_CPUQ_CFG_CPUQ_IGMP_X(x)                       (((x) & GENMASK(26, 24)) >> 24)
-#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL(x)                    (((x) << 21) & GENMASK(23, 21))
-#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL_M                     GENMASK(23, 21)
-#define ANA_CPUQ_CFG_CPUQ_IPMC_CTRL_X(x)                  (((x) & GENMASK(23, 21)) >> 21)
-#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE(x)                    (((x) << 18) & GENMASK(20, 18))
-#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE_M                     GENMASK(20, 18)
-#define ANA_CPUQ_CFG_CPUQ_ALLBRIDGE_X(x)                  (((x) & GENMASK(20, 18)) >> 18)
-#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE(x)              (((x) << 15) & GENMASK(17, 15))
-#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE_M               GENMASK(17, 15)
-#define ANA_CPUQ_CFG_CPUQ_LOCKED_PORTMOVE_X(x)            (((x) & GENMASK(17, 15)) >> 15)
-#define ANA_CPUQ_CFG_CPUQ_SRC_COPY(x)                     (((x) << 12) & GENMASK(14, 12))
-#define ANA_CPUQ_CFG_CPUQ_SRC_COPY_M                      GENMASK(14, 12)
-#define ANA_CPUQ_CFG_CPUQ_SRC_COPY_X(x)                   (((x) & GENMASK(14, 12)) >> 12)
-#define ANA_CPUQ_CFG_CPUQ_MAC_COPY(x)                     (((x) << 9) & GENMASK(11, 9))
-#define ANA_CPUQ_CFG_CPUQ_MAC_COPY_M                      GENMASK(11, 9)
-#define ANA_CPUQ_CFG_CPUQ_MAC_COPY_X(x)                   (((x) & GENMASK(11, 9)) >> 9)
-#define ANA_CPUQ_CFG_CPUQ_LRN(x)                          (((x) << 6) & GENMASK(8, 6))
-#define ANA_CPUQ_CFG_CPUQ_LRN_M                           GENMASK(8, 6)
-#define ANA_CPUQ_CFG_CPUQ_LRN_X(x)                        (((x) & GENMASK(8, 6)) >> 6)
-#define ANA_CPUQ_CFG_CPUQ_MIRROR(x)                       (((x) << 3) & GENMASK(5, 3))
-#define ANA_CPUQ_CFG_CPUQ_MIRROR_M                        GENMASK(5, 3)
-#define ANA_CPUQ_CFG_CPUQ_MIRROR_X(x)                     (((x) & GENMASK(5, 3)) >> 3)
-#define ANA_CPUQ_CFG_CPUQ_SFLOW(x)                        ((x) & GENMASK(2, 0))
-#define ANA_CPUQ_CFG_CPUQ_SFLOW_M                         GENMASK(2, 0)
-
-#define ANA_CPUQ_8021_CFG_RSZ                             0x4
-
-#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(x)                (((x) << 6) & GENMASK(8, 6))
-#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL_M                 GENMASK(8, 6)
-#define ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL_X(x)              (((x) & GENMASK(8, 6)) >> 6)
-#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL(x)                (((x) << 3) & GENMASK(5, 3))
-#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL_M                 GENMASK(5, 3)
-#define ANA_CPUQ_8021_CFG_CPUQ_GARP_VAL_X(x)              (((x) & GENMASK(5, 3)) >> 3)
-#define ANA_CPUQ_8021_CFG_CPUQ_CCM_VAL(x)                 ((x) & GENMASK(2, 0))
-#define ANA_CPUQ_8021_CFG_CPUQ_CCM_VAL_M                  GENMASK(2, 0)
-
-#define ANA_DSCP_CFG_RSZ                                  0x4
-
-#define ANA_DSCP_CFG_DP_DSCP_VAL                          BIT(11)
-#define ANA_DSCP_CFG_QOS_DSCP_VAL(x)                      (((x) << 8) & GENMASK(10, 8))
-#define ANA_DSCP_CFG_QOS_DSCP_VAL_M                       GENMASK(10, 8)
-#define ANA_DSCP_CFG_QOS_DSCP_VAL_X(x)                    (((x) & GENMASK(10, 8)) >> 8)
-#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL(x)                (((x) << 2) & GENMASK(7, 2))
-#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL_M                 GENMASK(7, 2)
-#define ANA_DSCP_CFG_DSCP_TRANSLATE_VAL_X(x)              (((x) & GENMASK(7, 2)) >> 2)
-#define ANA_DSCP_CFG_DSCP_TRUST_ENA                       BIT(1)
-#define ANA_DSCP_CFG_DSCP_REWR_ENA                        BIT(0)
-
-#define ANA_DSCP_REWR_CFG_RSZ                             0x4
-
-#define ANA_VCAP_RNG_TYPE_CFG_RSZ                         0x4
-
-#define ANA_VCAP_RNG_VAL_CFG_RSZ                          0x4
-
-#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL(x)          (((x) << 16) & GENMASK(31, 16))
-#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL_M           GENMASK(31, 16)
-#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MIN_VAL_X(x)        (((x) & GENMASK(31, 16)) >> 16)
-#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MAX_VAL(x)          ((x) & GENMASK(15, 0))
-#define ANA_VCAP_RNG_VAL_CFG_VCAP_RNG_MAX_VAL_M           GENMASK(15, 0)
-
-#define ANA_VRAP_CFG_VRAP_VLAN_AWARE_ENA                  BIT(12)
-#define ANA_VRAP_CFG_VRAP_VID(x)                          ((x) & GENMASK(11, 0))
-#define ANA_VRAP_CFG_VRAP_VID_M                           GENMASK(11, 0)
-
-#define ANA_DISCARD_CFG_DROP_TAGGING_ISDX0                BIT(3)
-#define ANA_DISCARD_CFG_DROP_CTRLPROT_ISDX0               BIT(2)
-#define ANA_DISCARD_CFG_DROP_TAGGING_S2_ENA               BIT(1)
-#define ANA_DISCARD_CFG_DROP_CTRLPROT_S2_ENA              BIT(0)
-
-#define ANA_FID_CFG_VID_MC_ENA                            BIT(0)
-
-#define ANA_POL_PIR_CFG_GSZ                               0x20
-
-#define ANA_POL_PIR_CFG_PIR_RATE(x)                       (((x) << 6) & GENMASK(20, 6))
-#define ANA_POL_PIR_CFG_PIR_RATE_M                        GENMASK(20, 6)
-#define ANA_POL_PIR_CFG_PIR_RATE_X(x)                     (((x) & GENMASK(20, 6)) >> 6)
-#define ANA_POL_PIR_CFG_PIR_BURST(x)                      ((x) & GENMASK(5, 0))
-#define ANA_POL_PIR_CFG_PIR_BURST_M                       GENMASK(5, 0)
-
-#define ANA_POL_CIR_CFG_GSZ                               0x20
-
-#define ANA_POL_CIR_CFG_CIR_RATE(x)                       (((x) << 6) & GENMASK(20, 6))
-#define ANA_POL_CIR_CFG_CIR_RATE_M                        GENMASK(20, 6)
-#define ANA_POL_CIR_CFG_CIR_RATE_X(x)                     (((x) & GENMASK(20, 6)) >> 6)
-#define ANA_POL_CIR_CFG_CIR_BURST(x)                      ((x) & GENMASK(5, 0))
-#define ANA_POL_CIR_CFG_CIR_BURST_M                       GENMASK(5, 0)
-
-#define ANA_POL_MODE_CFG_GSZ                              0x20
-
-#define ANA_POL_MODE_CFG_IPG_SIZE(x)                      (((x) << 5) & GENMASK(9, 5))
-#define ANA_POL_MODE_CFG_IPG_SIZE_M                       GENMASK(9, 5)
-#define ANA_POL_MODE_CFG_IPG_SIZE_X(x)                    (((x) & GENMASK(9, 5)) >> 5)
-#define ANA_POL_MODE_CFG_FRM_MODE(x)                      (((x) << 3) & GENMASK(4, 3))
-#define ANA_POL_MODE_CFG_FRM_MODE_M                       GENMASK(4, 3)
-#define ANA_POL_MODE_CFG_FRM_MODE_X(x)                    (((x) & GENMASK(4, 3)) >> 3)
-#define ANA_POL_MODE_CFG_DLB_COUPLED                      BIT(2)
-#define ANA_POL_MODE_CFG_CIR_ENA                          BIT(1)
-#define ANA_POL_MODE_CFG_OVERSHOOT_ENA                    BIT(0)
-
-#define ANA_POL_PIR_STATE_GSZ                             0x20
-
-#define ANA_POL_CIR_STATE_GSZ                             0x20
-
-#define ANA_POL_STATE_GSZ                                 0x20
-
-#define ANA_POL_FLOWC_RSZ                                 0x4
-
-#define ANA_POL_FLOWC_POL_FLOWC                           BIT(0)
-
-#define ANA_POL_HYST_POL_FC_HYST(x)                       (((x) << 4) & GENMASK(9, 4))
-#define ANA_POL_HYST_POL_FC_HYST_M                        GENMASK(9, 4)
-#define ANA_POL_HYST_POL_FC_HYST_X(x)                     (((x) & GENMASK(9, 4)) >> 4)
-#define ANA_POL_HYST_POL_STOP_HYST(x)                     ((x) & GENMASK(3, 0))
-#define ANA_POL_HYST_POL_STOP_HYST_M                      GENMASK(3, 0)
-
-#define ANA_POL_MISC_CFG_POL_CLOSE_ALL                    BIT(1)
-#define ANA_POL_MISC_CFG_POL_LEAK_DIS                     BIT(0)
-
-#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 2da8eee27e98..b38820849faa 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -402,9 +402,9 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 
 		of_get_phy_mode(portnp, &phy_mode);
 
-		priv->phy_mode = phy_mode;
+		ocelot_port->phy_mode = phy_mode;
 
-		switch (priv->phy_mode) {
+		switch (ocelot_port->phy_mode) {
 		case PHY_INTERFACE_MODE_NA:
 			continue;
 		case PHY_INTERFACE_MODE_SGMII:
diff --git a/drivers/net/ethernet/mscc/ocelot_dev.h b/drivers/net/ethernet/mscc/ocelot_dev.h
deleted file mode 100644
index 0a50d53bbd3f..000000000000
--- a/drivers/net/ethernet/mscc/ocelot_dev.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/*
- * Microsemi Ocelot Switch driver
- *
- * Copyright (c) 2017 Microsemi Corporation
- */
-
-#ifndef _MSCC_OCELOT_DEV_H_
-#define _MSCC_OCELOT_DEV_H_
-
-#define DEV_CLOCK_CFG                                     0x0
-
-#define DEV_CLOCK_CFG_MAC_TX_RST                          BIT(7)
-#define DEV_CLOCK_CFG_MAC_RX_RST                          BIT(6)
-#define DEV_CLOCK_CFG_PCS_TX_RST                          BIT(5)
-#define DEV_CLOCK_CFG_PCS_RX_RST                          BIT(4)
-#define DEV_CLOCK_CFG_PORT_RST                            BIT(3)
-#define DEV_CLOCK_CFG_PHY_RST                             BIT(2)
-#define DEV_CLOCK_CFG_LINK_SPEED(x)                       ((x) & GENMASK(1, 0))
-#define DEV_CLOCK_CFG_LINK_SPEED_M                        GENMASK(1, 0)
-
-#define DEV_PORT_MISC                                     0x4
-
-#define DEV_PORT_MISC_FWD_ERROR_ENA                       BIT(4)
-#define DEV_PORT_MISC_FWD_PAUSE_ENA                       BIT(3)
-#define DEV_PORT_MISC_FWD_CTRL_ENA                        BIT(2)
-#define DEV_PORT_MISC_DEV_LOOP_ENA                        BIT(1)
-#define DEV_PORT_MISC_HDX_FAST_DIS                        BIT(0)
-
-#define DEV_EVENTS                                        0x8
-
-#define DEV_EEE_CFG                                       0xc
-
-#define DEV_EEE_CFG_EEE_ENA                               BIT(22)
-#define DEV_EEE_CFG_EEE_TIMER_AGE(x)                      (((x) << 15) & GENMASK(21, 15))
-#define DEV_EEE_CFG_EEE_TIMER_AGE_M                       GENMASK(21, 15)
-#define DEV_EEE_CFG_EEE_TIMER_AGE_X(x)                    (((x) & GENMASK(21, 15)) >> 15)
-#define DEV_EEE_CFG_EEE_TIMER_WAKEUP(x)                   (((x) << 8) & GENMASK(14, 8))
-#define DEV_EEE_CFG_EEE_TIMER_WAKEUP_M                    GENMASK(14, 8)
-#define DEV_EEE_CFG_EEE_TIMER_WAKEUP_X(x)                 (((x) & GENMASK(14, 8)) >> 8)
-#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF(x)                  (((x) << 1) & GENMASK(7, 1))
-#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF_M                   GENMASK(7, 1)
-#define DEV_EEE_CFG_EEE_TIMER_HOLDOFF_X(x)                (((x) & GENMASK(7, 1)) >> 1)
-#define DEV_EEE_CFG_PORT_LPI                              BIT(0)
-
-#define DEV_RX_PATH_DELAY                                 0x10
-
-#define DEV_TX_PATH_DELAY                                 0x14
-
-#define DEV_PTP_PREDICT_CFG                               0x18
-
-#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG(x)        (((x) << 4) & GENMASK(11, 4))
-#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_M         GENMASK(11, 4)
-#define DEV_PTP_PREDICT_CFG_PTP_PHY_PREDICT_CFG_X(x)      (((x) & GENMASK(11, 4)) >> 4)
-#define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG(x)      ((x) & GENMASK(3, 0))
-#define DEV_PTP_PREDICT_CFG_PTP_PHASE_PREDICT_CFG_M       GENMASK(3, 0)
-
-#define DEV_MAC_ENA_CFG                                   0x1c
-
-#define DEV_MAC_ENA_CFG_RX_ENA                            BIT(4)
-#define DEV_MAC_ENA_CFG_TX_ENA                            BIT(0)
-
-#define DEV_MAC_MODE_CFG                                  0x20
-
-#define DEV_MAC_MODE_CFG_FC_WORD_SYNC_ENA                 BIT(8)
-#define DEV_MAC_MODE_CFG_GIGA_MODE_ENA                    BIT(4)
-#define DEV_MAC_MODE_CFG_FDX_ENA                          BIT(0)
-
-#define DEV_MAC_MAXLEN_CFG                                0x24
-
-#define DEV_MAC_TAGS_CFG                                  0x28
-
-#define DEV_MAC_TAGS_CFG_TAG_ID(x)                        (((x) << 16) & GENMASK(31, 16))
-#define DEV_MAC_TAGS_CFG_TAG_ID_M                         GENMASK(31, 16)
-#define DEV_MAC_TAGS_CFG_TAG_ID_X(x)                      (((x) & GENMASK(31, 16)) >> 16)
-#define DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA                 BIT(2)
-#define DEV_MAC_TAGS_CFG_PB_ENA                           BIT(1)
-#define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA                     BIT(0)
-
-#define DEV_MAC_ADV_CHK_CFG                               0x2c
-
-#define DEV_MAC_ADV_CHK_CFG_LEN_DROP_ENA                  BIT(0)
-
-#define DEV_MAC_IFG_CFG                                   0x30
-
-#define DEV_MAC_IFG_CFG_RESTORE_OLD_IPG_CHECK             BIT(17)
-#define DEV_MAC_IFG_CFG_REDUCED_TX_IFG                    BIT(16)
-#define DEV_MAC_IFG_CFG_TX_IFG(x)                         (((x) << 8) & GENMASK(12, 8))
-#define DEV_MAC_IFG_CFG_TX_IFG_M                          GENMASK(12, 8)
-#define DEV_MAC_IFG_CFG_TX_IFG_X(x)                       (((x) & GENMASK(12, 8)) >> 8)
-#define DEV_MAC_IFG_CFG_RX_IFG2(x)                        (((x) << 4) & GENMASK(7, 4))
-#define DEV_MAC_IFG_CFG_RX_IFG2_M                         GENMASK(7, 4)
-#define DEV_MAC_IFG_CFG_RX_IFG2_X(x)                      (((x) & GENMASK(7, 4)) >> 4)
-#define DEV_MAC_IFG_CFG_RX_IFG1(x)                        ((x) & GENMASK(3, 0))
-#define DEV_MAC_IFG_CFG_RX_IFG1_M                         GENMASK(3, 0)
-
-#define DEV_MAC_HDX_CFG                                   0x34
-
-#define DEV_MAC_HDX_CFG_BYPASS_COL_SYNC                   BIT(26)
-#define DEV_MAC_HDX_CFG_OB_ENA                            BIT(25)
-#define DEV_MAC_HDX_CFG_WEXC_DIS                          BIT(24)
-#define DEV_MAC_HDX_CFG_SEED(x)                           (((x) << 16) & GENMASK(23, 16))
-#define DEV_MAC_HDX_CFG_SEED_M                            GENMASK(23, 16)
-#define DEV_MAC_HDX_CFG_SEED_X(x)                         (((x) & GENMASK(23, 16)) >> 16)
-#define DEV_MAC_HDX_CFG_SEED_LOAD                         BIT(12)
-#define DEV_MAC_HDX_CFG_RETRY_AFTER_EXC_COL_ENA           BIT(8)
-#define DEV_MAC_HDX_CFG_LATE_COL_POS(x)                   ((x) & GENMASK(6, 0))
-#define DEV_MAC_HDX_CFG_LATE_COL_POS_M                    GENMASK(6, 0)
-
-#define DEV_MAC_DBG_CFG                                   0x38
-
-#define DEV_MAC_DBG_CFG_TBI_MODE                          BIT(4)
-#define DEV_MAC_DBG_CFG_IFG_CRS_EXT_CHK_ENA               BIT(0)
-
-#define DEV_MAC_FC_MAC_LOW_CFG                            0x3c
-
-#define DEV_MAC_FC_MAC_HIGH_CFG                           0x40
-
-#define DEV_MAC_STICKY                                    0x44
-
-#define DEV_MAC_STICKY_RX_IPG_SHRINK_STICKY               BIT(9)
-#define DEV_MAC_STICKY_RX_PREAM_SHRINK_STICKY             BIT(8)
-#define DEV_MAC_STICKY_RX_CARRIER_EXT_STICKY              BIT(7)
-#define DEV_MAC_STICKY_RX_CARRIER_EXT_ERR_STICKY          BIT(6)
-#define DEV_MAC_STICKY_RX_JUNK_STICKY                     BIT(5)
-#define DEV_MAC_STICKY_TX_RETRANSMIT_STICKY               BIT(4)
-#define DEV_MAC_STICKY_TX_JAM_STICKY                      BIT(3)
-#define DEV_MAC_STICKY_TX_FIFO_OFLW_STICKY                BIT(2)
-#define DEV_MAC_STICKY_TX_FRM_LEN_OVR_STICKY              BIT(1)
-#define DEV_MAC_STICKY_TX_ABORT_STICKY                    BIT(0)
-
-#define PCS1G_CFG                                         0x48
-
-#define PCS1G_CFG_LINK_STATUS_TYPE                        BIT(4)
-#define PCS1G_CFG_AN_LINK_CTRL_ENA                        BIT(1)
-#define PCS1G_CFG_PCS_ENA                                 BIT(0)
-
-#define PCS1G_MODE_CFG                                    0x4c
-
-#define PCS1G_MODE_CFG_UNIDIR_MODE_ENA                    BIT(4)
-#define PCS1G_MODE_CFG_SGMII_MODE_ENA                     BIT(0)
-
-#define PCS1G_SD_CFG                                      0x50
-
-#define PCS1G_SD_CFG_SD_SEL                               BIT(8)
-#define PCS1G_SD_CFG_SD_POL                               BIT(4)
-#define PCS1G_SD_CFG_SD_ENA                               BIT(0)
-
-#define PCS1G_ANEG_CFG                                    0x54
-
-#define PCS1G_ANEG_CFG_ADV_ABILITY(x)                     (((x) << 16) & GENMASK(31, 16))
-#define PCS1G_ANEG_CFG_ADV_ABILITY_M                      GENMASK(31, 16)
-#define PCS1G_ANEG_CFG_ADV_ABILITY_X(x)                   (((x) & GENMASK(31, 16)) >> 16)
-#define PCS1G_ANEG_CFG_SW_RESOLVE_ENA                     BIT(8)
-#define PCS1G_ANEG_CFG_ANEG_RESTART_ONE_SHOT              BIT(1)
-#define PCS1G_ANEG_CFG_ANEG_ENA                           BIT(0)
-
-#define PCS1G_ANEG_NP_CFG                                 0x58
-
-#define PCS1G_ANEG_NP_CFG_NP_TX(x)                        (((x) << 16) & GENMASK(31, 16))
-#define PCS1G_ANEG_NP_CFG_NP_TX_M                         GENMASK(31, 16)
-#define PCS1G_ANEG_NP_CFG_NP_TX_X(x)                      (((x) & GENMASK(31, 16)) >> 16)
-#define PCS1G_ANEG_NP_CFG_NP_LOADED_ONE_SHOT              BIT(0)
-
-#define PCS1G_LB_CFG                                      0x5c
-
-#define PCS1G_LB_CFG_RA_ENA                               BIT(4)
-#define PCS1G_LB_CFG_GMII_PHY_LB_ENA                      BIT(1)
-#define PCS1G_LB_CFG_TBI_HOST_LB_ENA                      BIT(0)
-
-#define PCS1G_DBG_CFG                                     0x60
-
-#define PCS1G_DBG_CFG_UDLT                                BIT(0)
-
-#define PCS1G_CDET_CFG                                    0x64
-
-#define PCS1G_CDET_CFG_CDET_ENA                           BIT(0)
-
-#define PCS1G_ANEG_STATUS                                 0x68
-
-#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY(x)               (((x) << 16) & GENMASK(31, 16))
-#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_M                GENMASK(31, 16)
-#define PCS1G_ANEG_STATUS_LP_ADV_ABILITY_X(x)             (((x) & GENMASK(31, 16)) >> 16)
-#define PCS1G_ANEG_STATUS_PR                              BIT(4)
-#define PCS1G_ANEG_STATUS_PAGE_RX_STICKY                  BIT(3)
-#define PCS1G_ANEG_STATUS_ANEG_COMPLETE                   BIT(0)
-
-#define PCS1G_ANEG_NP_STATUS                              0x6c
-
-#define PCS1G_LINK_STATUS                                 0x70
-
-#define PCS1G_LINK_STATUS_DELAY_VAR(x)                    (((x) << 12) & GENMASK(15, 12))
-#define PCS1G_LINK_STATUS_DELAY_VAR_M                     GENMASK(15, 12)
-#define PCS1G_LINK_STATUS_DELAY_VAR_X(x)                  (((x) & GENMASK(15, 12)) >> 12)
-#define PCS1G_LINK_STATUS_SIGNAL_DETECT                   BIT(8)
-#define PCS1G_LINK_STATUS_LINK_STATUS                     BIT(4)
-#define PCS1G_LINK_STATUS_SYNC_STATUS                     BIT(0)
-
-#define PCS1G_LINK_DOWN_CNT                               0x74
-
-#define PCS1G_STICKY                                      0x78
-
-#define PCS1G_STICKY_LINK_DOWN_STICKY                     BIT(4)
-#define PCS1G_STICKY_OUT_OF_SYNC_STICKY                   BIT(0)
-
-#define PCS1G_DEBUG_STATUS                                0x7c
-
-#define PCS1G_LPI_CFG                                     0x80
-
-#define PCS1G_LPI_CFG_QSGMII_MS_SEL                       BIT(20)
-#define PCS1G_LPI_CFG_RX_LPI_OUT_DIS                      BIT(17)
-#define PCS1G_LPI_CFG_LPI_TESTMODE                        BIT(16)
-#define PCS1G_LPI_CFG_LPI_RX_WTIM(x)                      (((x) << 4) & GENMASK(5, 4))
-#define PCS1G_LPI_CFG_LPI_RX_WTIM_M                       GENMASK(5, 4)
-#define PCS1G_LPI_CFG_LPI_RX_WTIM_X(x)                    (((x) & GENMASK(5, 4)) >> 4)
-#define PCS1G_LPI_CFG_TX_ASSERT_LPIDLE                    BIT(0)
-
-#define PCS1G_LPI_WAKE_ERROR_CNT                          0x84
-
-#define PCS1G_LPI_STATUS                                  0x88
-
-#define PCS1G_LPI_STATUS_RX_LPI_FAIL                      BIT(16)
-#define PCS1G_LPI_STATUS_RX_LPI_EVENT_STICKY              BIT(12)
-#define PCS1G_LPI_STATUS_RX_QUIET                         BIT(9)
-#define PCS1G_LPI_STATUS_RX_LPI_MODE                      BIT(8)
-#define PCS1G_LPI_STATUS_TX_LPI_EVENT_STICKY              BIT(4)
-#define PCS1G_LPI_STATUS_TX_QUIET                         BIT(1)
-#define PCS1G_LPI_STATUS_TX_LPI_MODE                      BIT(0)
-
-#define PCS1G_TSTPAT_MODE_CFG                             0x8c
-
-#define PCS1G_TSTPAT_STATUS                               0x90
-
-#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT(x)                (((x) << 8) & GENMASK(15, 8))
-#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_M                 GENMASK(15, 8)
-#define PCS1G_TSTPAT_STATUS_JTP_ERR_CNT_X(x)              (((x) & GENMASK(15, 8)) >> 8)
-#define PCS1G_TSTPAT_STATUS_JTP_ERR                       BIT(4)
-#define PCS1G_TSTPAT_STATUS_JTP_LOCK                      BIT(0)
-
-#define DEV_PCS_FX100_CFG                                 0x94
-
-#define DEV_PCS_FX100_CFG_SD_SEL                          BIT(26)
-#define DEV_PCS_FX100_CFG_SD_POL                          BIT(25)
-#define DEV_PCS_FX100_CFG_SD_ENA                          BIT(24)
-#define DEV_PCS_FX100_CFG_LOOPBACK_ENA                    BIT(20)
-#define DEV_PCS_FX100_CFG_SWAP_MII_ENA                    BIT(16)
-#define DEV_PCS_FX100_CFG_RXBITSEL(x)                     (((x) << 12) & GENMASK(15, 12))
-#define DEV_PCS_FX100_CFG_RXBITSEL_M                      GENMASK(15, 12)
-#define DEV_PCS_FX100_CFG_RXBITSEL_X(x)                   (((x) & GENMASK(15, 12)) >> 12)
-#define DEV_PCS_FX100_CFG_SIGDET_CFG(x)                   (((x) << 9) & GENMASK(10, 9))
-#define DEV_PCS_FX100_CFG_SIGDET_CFG_M                    GENMASK(10, 9)
-#define DEV_PCS_FX100_CFG_SIGDET_CFG_X(x)                 (((x) & GENMASK(10, 9)) >> 9)
-#define DEV_PCS_FX100_CFG_LINKHYST_TM_ENA                 BIT(8)
-#define DEV_PCS_FX100_CFG_LINKHYSTTIMER(x)                (((x) << 4) & GENMASK(7, 4))
-#define DEV_PCS_FX100_CFG_LINKHYSTTIMER_M                 GENMASK(7, 4)
-#define DEV_PCS_FX100_CFG_LINKHYSTTIMER_X(x)              (((x) & GENMASK(7, 4)) >> 4)
-#define DEV_PCS_FX100_CFG_UNIDIR_MODE_ENA                 BIT(3)
-#define DEV_PCS_FX100_CFG_FEFCHK_ENA                      BIT(2)
-#define DEV_PCS_FX100_CFG_FEFGEN_ENA                      BIT(1)
-#define DEV_PCS_FX100_CFG_PCS_ENA                         BIT(0)
-
-#define DEV_PCS_FX100_STATUS                              0x98
-
-#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP(x)              (((x) << 8) & GENMASK(11, 8))
-#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_M               GENMASK(11, 8)
-#define DEV_PCS_FX100_STATUS_EDGE_POS_PTP_X(x)            (((x) & GENMASK(11, 8)) >> 8)
-#define DEV_PCS_FX100_STATUS_PCS_ERROR_STICKY             BIT(7)
-#define DEV_PCS_FX100_STATUS_FEF_FOUND_STICKY             BIT(6)
-#define DEV_PCS_FX100_STATUS_SSD_ERROR_STICKY             BIT(5)
-#define DEV_PCS_FX100_STATUS_SYNC_LOST_STICKY             BIT(4)
-#define DEV_PCS_FX100_STATUS_FEF_STATUS                   BIT(2)
-#define DEV_PCS_FX100_STATUS_SIGNAL_DETECT                BIT(1)
-#define DEV_PCS_FX100_STATUS_SYNC_STATUS                  BIT(0)
-
-#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_qsys.h b/drivers/net/ethernet/mscc/ocelot_qsys.h
deleted file mode 100644
index d8c63aa761be..000000000000
--- a/drivers/net/ethernet/mscc/ocelot_qsys.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/*
- * Microsemi Ocelot Switch driver
- *
- * Copyright (c) 2017 Microsemi Corporation
- */
-
-#ifndef _MSCC_OCELOT_QSYS_H_
-#define _MSCC_OCELOT_QSYS_H_
-
-#define QSYS_PORT_MODE_RSZ                                0x4
-
-#define QSYS_PORT_MODE_DEQUEUE_DIS                        BIT(1)
-#define QSYS_PORT_MODE_DEQUEUE_LATE                       BIT(0)
-
-#define QSYS_SWITCH_PORT_MODE_RSZ                         0x4
-
-#define QSYS_SWITCH_PORT_MODE_PORT_ENA                    BIT(14)
-#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(x)             (((x) << 11) & GENMASK(13, 11))
-#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_M              GENMASK(13, 11)
-#define QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG_X(x)           (((x) & GENMASK(13, 11)) >> 11)
-#define QSYS_SWITCH_PORT_MODE_YEL_RSRVD                   BIT(10)
-#define QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE           BIT(9)
-#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA(x)               (((x) << 1) & GENMASK(8, 1))
-#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_M                GENMASK(8, 1)
-#define QSYS_SWITCH_PORT_MODE_TX_PFC_ENA_X(x)             (((x) & GENMASK(8, 1)) >> 1)
-#define QSYS_SWITCH_PORT_MODE_TX_PFC_MODE                 BIT(0)
-
-#define QSYS_STAT_CNT_CFG_TX_GREEN_CNT_MODE               BIT(5)
-#define QSYS_STAT_CNT_CFG_TX_YELLOW_CNT_MODE              BIT(4)
-#define QSYS_STAT_CNT_CFG_DROP_GREEN_CNT_MODE             BIT(3)
-#define QSYS_STAT_CNT_CFG_DROP_YELLOW_CNT_MODE            BIT(2)
-#define QSYS_STAT_CNT_CFG_DROP_COUNT_ONCE                 BIT(1)
-#define QSYS_STAT_CNT_CFG_DROP_COUNT_EGRESS               BIT(0)
-
-#define QSYS_EEE_CFG_RSZ                                  0x4
-
-#define QSYS_EEE_THRES_EEE_HIGH_BYTES(x)                  (((x) << 8) & GENMASK(15, 8))
-#define QSYS_EEE_THRES_EEE_HIGH_BYTES_M                   GENMASK(15, 8)
-#define QSYS_EEE_THRES_EEE_HIGH_BYTES_X(x)                (((x) & GENMASK(15, 8)) >> 8)
-#define QSYS_EEE_THRES_EEE_HIGH_FRAMES(x)                 ((x) & GENMASK(7, 0))
-#define QSYS_EEE_THRES_EEE_HIGH_FRAMES_M                  GENMASK(7, 0)
-
-#define QSYS_SW_STATUS_RSZ                                0x4
-
-#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT(x)                  (((x) << 8) & GENMASK(12, 8))
-#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT_M                   GENMASK(12, 8)
-#define QSYS_EXT_CPU_CFG_EXT_CPU_PORT_X(x)                (((x) & GENMASK(12, 8)) >> 8)
-#define QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK(x)                  ((x) & GENMASK(7, 0))
-#define QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M                   GENMASK(7, 0)
-
-#define QSYS_QMAP_GSZ                                     0x4
-
-#define QSYS_QMAP_SE_BASE(x)                              (((x) << 5) & GENMASK(12, 5))
-#define QSYS_QMAP_SE_BASE_M                               GENMASK(12, 5)
-#define QSYS_QMAP_SE_BASE_X(x)                            (((x) & GENMASK(12, 5)) >> 5)
-#define QSYS_QMAP_SE_IDX_SEL(x)                           (((x) << 2) & GENMASK(4, 2))
-#define QSYS_QMAP_SE_IDX_SEL_M                            GENMASK(4, 2)
-#define QSYS_QMAP_SE_IDX_SEL_X(x)                         (((x) & GENMASK(4, 2)) >> 2)
-#define QSYS_QMAP_SE_INP_SEL(x)                           ((x) & GENMASK(1, 0))
-#define QSYS_QMAP_SE_INP_SEL_M                            GENMASK(1, 0)
-
-#define QSYS_ISDX_SGRP_GSZ                                0x4
-
-#define QSYS_TIMED_FRAME_ENTRY_GSZ                        0x4
-
-#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT(x)               (((x) << 9) & GENMASK(18, 9))
-#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT_M                GENMASK(18, 9)
-#define QSYS_TFRM_MISC_TIMED_CANCEL_SLOT_X(x)             (((x) & GENMASK(18, 9)) >> 9)
-#define QSYS_TFRM_MISC_TIMED_CANCEL_1SHOT                 BIT(8)
-#define QSYS_TFRM_MISC_TIMED_SLOT_MODE_MC                 BIT(7)
-#define QSYS_TFRM_MISC_TIMED_ENTRY_FAST_CNT(x)            ((x) & GENMASK(6, 0))
-#define QSYS_TFRM_MISC_TIMED_ENTRY_FAST_CNT_M             GENMASK(6, 0)
-
-#define QSYS_RED_PROFILE_RSZ                              0x4
-
-#define QSYS_RED_PROFILE_WM_RED_LOW(x)                    (((x) << 8) & GENMASK(15, 8))
-#define QSYS_RED_PROFILE_WM_RED_LOW_M                     GENMASK(15, 8)
-#define QSYS_RED_PROFILE_WM_RED_LOW_X(x)                  (((x) & GENMASK(15, 8)) >> 8)
-#define QSYS_RED_PROFILE_WM_RED_HIGH(x)                   ((x) & GENMASK(7, 0))
-#define QSYS_RED_PROFILE_WM_RED_HIGH_M                    GENMASK(7, 0)
-
-#define QSYS_RES_CFG_GSZ                                  0x8
-
-#define QSYS_RES_STAT_GSZ                                 0x8
-
-#define QSYS_RES_STAT_INUSE(x)                            (((x) << 12) & GENMASK(23, 12))
-#define QSYS_RES_STAT_INUSE_M                             GENMASK(23, 12)
-#define QSYS_RES_STAT_INUSE_X(x)                          (((x) & GENMASK(23, 12)) >> 12)
-#define QSYS_RES_STAT_MAXUSE(x)                           ((x) & GENMASK(11, 0))
-#define QSYS_RES_STAT_MAXUSE_M                            GENMASK(11, 0)
-
-#define QSYS_EVENTS_CORE_EV_FDC(x)                        (((x) << 2) & GENMASK(4, 2))
-#define QSYS_EVENTS_CORE_EV_FDC_M                         GENMASK(4, 2)
-#define QSYS_EVENTS_CORE_EV_FDC_X(x)                      (((x) & GENMASK(4, 2)) >> 2)
-#define QSYS_EVENTS_CORE_EV_FRD(x)                        ((x) & GENMASK(1, 0))
-#define QSYS_EVENTS_CORE_EV_FRD_M                         GENMASK(1, 0)
-
-#define QSYS_QMAXSDU_CFG_0_RSZ                            0x4
-
-#define QSYS_QMAXSDU_CFG_1_RSZ                            0x4
-
-#define QSYS_QMAXSDU_CFG_2_RSZ                            0x4
-
-#define QSYS_QMAXSDU_CFG_3_RSZ                            0x4
-
-#define QSYS_QMAXSDU_CFG_4_RSZ                            0x4
-
-#define QSYS_QMAXSDU_CFG_5_RSZ                            0x4
-
-#define QSYS_QMAXSDU_CFG_6_RSZ                            0x4
-
-#define QSYS_QMAXSDU_CFG_7_RSZ                            0x4
-
-#define QSYS_PREEMPTION_CFG_RSZ                           0x4
-
-#define QSYS_PREEMPTION_CFG_P_QUEUES(x)                   ((x) & GENMASK(7, 0))
-#define QSYS_PREEMPTION_CFG_P_QUEUES_M                    GENMASK(7, 0)
-#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE(x)           (((x) << 8) & GENMASK(9, 8))
-#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_M            GENMASK(9, 8)
-#define QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(x)         (((x) & GENMASK(9, 8)) >> 8)
-#define QSYS_PREEMPTION_CFG_STRICT_IPG(x)                 (((x) << 12) & GENMASK(13, 12))
-#define QSYS_PREEMPTION_CFG_STRICT_IPG_M                  GENMASK(13, 12)
-#define QSYS_PREEMPTION_CFG_STRICT_IPG_X(x)               (((x) & GENMASK(13, 12)) >> 12)
-#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE(x)               (((x) << 16) & GENMASK(31, 16))
-#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE_M                GENMASK(31, 16)
-#define QSYS_PREEMPTION_CFG_HOLD_ADVANCE_X(x)             (((x) & GENMASK(31, 16)) >> 16)
-
-#define QSYS_CIR_CFG_GSZ                                  0x80
-
-#define QSYS_CIR_CFG_CIR_RATE(x)                          (((x) << 6) & GENMASK(20, 6))
-#define QSYS_CIR_CFG_CIR_RATE_M                           GENMASK(20, 6)
-#define QSYS_CIR_CFG_CIR_RATE_X(x)                        (((x) & GENMASK(20, 6)) >> 6)
-#define QSYS_CIR_CFG_CIR_BURST(x)                         ((x) & GENMASK(5, 0))
-#define QSYS_CIR_CFG_CIR_BURST_M                          GENMASK(5, 0)
-
-#define QSYS_EIR_CFG_GSZ                                  0x80
-
-#define QSYS_EIR_CFG_EIR_RATE(x)                          (((x) << 7) & GENMASK(21, 7))
-#define QSYS_EIR_CFG_EIR_RATE_M                           GENMASK(21, 7)
-#define QSYS_EIR_CFG_EIR_RATE_X(x)                        (((x) & GENMASK(21, 7)) >> 7)
-#define QSYS_EIR_CFG_EIR_BURST(x)                         (((x) << 1) & GENMASK(6, 1))
-#define QSYS_EIR_CFG_EIR_BURST_M                          GENMASK(6, 1)
-#define QSYS_EIR_CFG_EIR_BURST_X(x)                       (((x) & GENMASK(6, 1)) >> 1)
-#define QSYS_EIR_CFG_EIR_MARK_ENA                         BIT(0)
-
-#define QSYS_SE_CFG_GSZ                                   0x80
-
-#define QSYS_SE_CFG_SE_DWRR_CNT(x)                        (((x) << 6) & GENMASK(9, 6))
-#define QSYS_SE_CFG_SE_DWRR_CNT_M                         GENMASK(9, 6)
-#define QSYS_SE_CFG_SE_DWRR_CNT_X(x)                      (((x) & GENMASK(9, 6)) >> 6)
-#define QSYS_SE_CFG_SE_RR_ENA                             BIT(5)
-#define QSYS_SE_CFG_SE_AVB_ENA                            BIT(4)
-#define QSYS_SE_CFG_SE_FRM_MODE(x)                        (((x) << 2) & GENMASK(3, 2))
-#define QSYS_SE_CFG_SE_FRM_MODE_M                         GENMASK(3, 2)
-#define QSYS_SE_CFG_SE_FRM_MODE_X(x)                      (((x) & GENMASK(3, 2)) >> 2)
-#define QSYS_SE_CFG_SE_EXC_ENA                            BIT(1)
-#define QSYS_SE_CFG_SE_EXC_FWD                            BIT(0)
-
-#define QSYS_SE_DWRR_CFG_GSZ                              0x80
-#define QSYS_SE_DWRR_CFG_RSZ                              0x4
-
-#define QSYS_SE_CONNECT_GSZ                               0x80
-
-#define QSYS_SE_CONNECT_SE_OUTP_IDX(x)                    (((x) << 17) & GENMASK(24, 17))
-#define QSYS_SE_CONNECT_SE_OUTP_IDX_M                     GENMASK(24, 17)
-#define QSYS_SE_CONNECT_SE_OUTP_IDX_X(x)                  (((x) & GENMASK(24, 17)) >> 17)
-#define QSYS_SE_CONNECT_SE_INP_IDX(x)                     (((x) << 9) & GENMASK(16, 9))
-#define QSYS_SE_CONNECT_SE_INP_IDX_M                      GENMASK(16, 9)
-#define QSYS_SE_CONNECT_SE_INP_IDX_X(x)                   (((x) & GENMASK(16, 9)) >> 9)
-#define QSYS_SE_CONNECT_SE_OUTP_CON(x)                    (((x) << 5) & GENMASK(8, 5))
-#define QSYS_SE_CONNECT_SE_OUTP_CON_M                     GENMASK(8, 5)
-#define QSYS_SE_CONNECT_SE_OUTP_CON_X(x)                  (((x) & GENMASK(8, 5)) >> 5)
-#define QSYS_SE_CONNECT_SE_INP_CNT(x)                     (((x) << 1) & GENMASK(4, 1))
-#define QSYS_SE_CONNECT_SE_INP_CNT_M                      GENMASK(4, 1)
-#define QSYS_SE_CONNECT_SE_INP_CNT_X(x)                   (((x) & GENMASK(4, 1)) >> 1)
-#define QSYS_SE_CONNECT_SE_TERMINAL                       BIT(0)
-
-#define QSYS_SE_DLB_SENSE_GSZ                             0x80
-
-#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO(x)                  (((x) << 11) & GENMASK(13, 11))
-#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_M                   GENMASK(13, 11)
-#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_X(x)                (((x) & GENMASK(13, 11)) >> 11)
-#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT(x)                 (((x) << 7) & GENMASK(10, 7))
-#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_M                  GENMASK(10, 7)
-#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_X(x)               (((x) & GENMASK(10, 7)) >> 7)
-#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT(x)                 (((x) << 3) & GENMASK(6, 3))
-#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_M                  GENMASK(6, 3)
-#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_X(x)               (((x) & GENMASK(6, 3)) >> 3)
-#define QSYS_SE_DLB_SENSE_SE_DLB_PRIO_ENA                 BIT(2)
-#define QSYS_SE_DLB_SENSE_SE_DLB_SPORT_ENA                BIT(1)
-#define QSYS_SE_DLB_SENSE_SE_DLB_DPORT_ENA                BIT(0)
-
-#define QSYS_CIR_STATE_GSZ                                0x80
-
-#define QSYS_CIR_STATE_CIR_LVL(x)                         (((x) << 4) & GENMASK(25, 4))
-#define QSYS_CIR_STATE_CIR_LVL_M                          GENMASK(25, 4)
-#define QSYS_CIR_STATE_CIR_LVL_X(x)                       (((x) & GENMASK(25, 4)) >> 4)
-#define QSYS_CIR_STATE_SHP_TIME(x)                        ((x) & GENMASK(3, 0))
-#define QSYS_CIR_STATE_SHP_TIME_M                         GENMASK(3, 0)
-
-#define QSYS_EIR_STATE_GSZ                                0x80
-
-#define QSYS_SE_STATE_GSZ                                 0x80
-
-#define QSYS_SE_STATE_SE_OUTP_LVL(x)                      (((x) << 1) & GENMASK(2, 1))
-#define QSYS_SE_STATE_SE_OUTP_LVL_M                       GENMASK(2, 1)
-#define QSYS_SE_STATE_SE_OUTP_LVL_X(x)                    (((x) & GENMASK(2, 1)) >> 1)
-#define QSYS_SE_STATE_SE_WAS_YEL                          BIT(0)
-
-#define QSYS_HSCH_MISC_CFG_SE_CONNECT_VLD                 BIT(8)
-#define QSYS_HSCH_MISC_CFG_FRM_ADJ(x)                     (((x) << 3) & GENMASK(7, 3))
-#define QSYS_HSCH_MISC_CFG_FRM_ADJ_M                      GENMASK(7, 3)
-#define QSYS_HSCH_MISC_CFG_FRM_ADJ_X(x)                   (((x) & GENMASK(7, 3)) >> 3)
-#define QSYS_HSCH_MISC_CFG_LEAK_DIS                       BIT(2)
-#define QSYS_HSCH_MISC_CFG_QSHP_EXC_ENA                   BIT(1)
-#define QSYS_HSCH_MISC_CFG_PFC_BYP_UPD                    BIT(0)
-
-#define QSYS_TAG_CONFIG_RSZ                               0x4
-
-#define QSYS_TAG_CONFIG_ENABLE                            BIT(0)
-#define QSYS_TAG_CONFIG_LINK_SPEED(x)                     (((x) << 4) & GENMASK(5, 4))
-#define QSYS_TAG_CONFIG_LINK_SPEED_M                      GENMASK(5, 4)
-#define QSYS_TAG_CONFIG_LINK_SPEED_X(x)                   (((x) & GENMASK(5, 4)) >> 4)
-#define QSYS_TAG_CONFIG_INIT_GATE_STATE(x)                (((x) << 8) & GENMASK(15, 8))
-#define QSYS_TAG_CONFIG_INIT_GATE_STATE_M                 GENMASK(15, 8)
-#define QSYS_TAG_CONFIG_INIT_GATE_STATE_X(x)              (((x) & GENMASK(15, 8)) >> 8)
-#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES(x)             (((x) << 16) & GENMASK(23, 16))
-#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_M              GENMASK(23, 16)
-#define QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_X(x)           (((x) & GENMASK(23, 16)) >> 16)
-
-#define QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(x)               ((x) & GENMASK(7, 0))
-#define QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M                GENMASK(7, 0)
-#define QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q   BIT(8)
-#define QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE             BIT(16)
-
-#define QSYS_PORT_MAX_SDU_RSZ                             0x4
-
-#define QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB(x)         ((x) & GENMASK(15, 0))
-#define QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M          GENMASK(15, 0)
-#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH(x)               (((x) << 16) & GENMASK(31, 16))
-#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH_M                GENMASK(31, 16)
-#define QSYS_PARAM_CFG_REG_3_LIST_LENGTH_X(x)             (((x) & GENMASK(31, 16)) >> 16)
-
-#define QSYS_GCL_CFG_REG_1_GCL_ENTRY_NUM(x)               ((x) & GENMASK(5, 0))
-#define QSYS_GCL_CFG_REG_1_GCL_ENTRY_NUM_M                GENMASK(5, 0)
-#define QSYS_GCL_CFG_REG_1_GATE_STATE(x)                  (((x) << 8) & GENMASK(15, 8))
-#define QSYS_GCL_CFG_REG_1_GATE_STATE_M                   GENMASK(15, 8)
-#define QSYS_GCL_CFG_REG_1_GATE_STATE_X(x)                (((x) & GENMASK(15, 8)) >> 8)
-
-#define QSYS_PARAM_STATUS_REG_3_BASE_TIME_SEC_MSB(x)      ((x) & GENMASK(15, 0))
-#define QSYS_PARAM_STATUS_REG_3_BASE_TIME_SEC_MSB_M       GENMASK(15, 0)
-#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH(x)            (((x) << 16) & GENMASK(31, 16))
-#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_M             GENMASK(31, 16)
-#define QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_X(x)          (((x) & GENMASK(31, 16)) >> 16)
-
-#define QSYS_PARAM_STATUS_REG_8_CFG_CHG_TIME_SEC_MSB(x)   ((x) & GENMASK(15, 0))
-#define QSYS_PARAM_STATUS_REG_8_CFG_CHG_TIME_SEC_MSB_M    GENMASK(15, 0)
-#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE(x)        (((x) << 16) & GENMASK(23, 16))
-#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE_M         GENMASK(23, 16)
-#define QSYS_PARAM_STATUS_REG_8_OPER_GATE_STATE_X(x)      (((x) & GENMASK(23, 16)) >> 16)
-#define QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING            BIT(24)
-
-#define QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM(x)            ((x) & GENMASK(5, 0))
-#define QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM_M             GENMASK(5, 0)
-#define QSYS_GCL_STATUS_REG_1_GATE_STATE(x)               (((x) << 8) & GENMASK(15, 8))
-#define QSYS_GCL_STATUS_REG_1_GATE_STATE_M                GENMASK(15, 8)
-#define QSYS_GCL_STATUS_REG_1_GATE_STATE_X(x)             (((x) & GENMASK(15, 8)) >> 8)
-
-#endif
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index c979f38a2e0c..2ee0d0be113a 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -2892,7 +2892,7 @@ drop:
 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
 					 struct net_device *dev)
 {
-	struct sk_buff *segs, *curr;
+	struct sk_buff *segs, *curr, *next;
 	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_slice_state *ss;
 	netdev_tx_t status;
@@ -2901,10 +2901,8 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
 	if (IS_ERR(segs))
 		goto drop;
 
-	while (segs) {
-		curr = segs;
-		segs = segs->next;
-		curr->next = NULL;
+	skb_list_walk_safe(segs, curr, next) {
+		skb_mark_not_on_list(curr);
 		status = myri10ge_xmit(curr, dev);
 		if (status != 0) {
 			dev_kfree_skb_any(curr);
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 1a2634cbbb69..d21d706b83a7 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -612,7 +612,7 @@ static void undo_cable_magic(struct net_device *dev);
 static void check_link(struct net_device *dev);
 static void netdev_timer(struct timer_list *t);
 static void dump_ring(struct net_device *dev);
-static void ns_tx_timeout(struct net_device *dev);
+static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static int alloc_ring(struct net_device *dev);
 static void refill_rx(struct net_device *dev);
 static void init_ring(struct net_device *dev);
@@ -1881,7 +1881,7 @@ static void dump_ring(struct net_device *dev)
 	}
 }
 
-static void ns_tx_timeout(struct net_device *dev)
+static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem * ioaddr = ns_ioaddr(dev);
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 6af9a7eee114..8e24c7acf79b 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1549,7 +1549,7 @@ static int ns83820_stop(struct net_device *ndev)
 	return 0;
 }
 
-static void ns83820_tx_timeout(struct net_device *ndev)
+static void ns83820_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct ns83820 *dev = PRIV(ndev);
         u32 tx_done_idx;
@@ -1603,7 +1603,7 @@ static void ns83820_tx_watch(struct timer_list *t)
 			ndev->name,
 			dev->tx_done_idx, dev->tx_free_idx,
 			atomic_read(&dev->nr_tx_skbs));
-		ns83820_tx_timeout(ndev);
+		ns83820_tx_timeout(ndev, UINT_MAX);
 	}
 
 	mod_timer(&dev->tx_watchdog, jiffies + 2*HZ);
@@ -1937,7 +1937,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
 
 	pci_set_master(pci_dev);
 	addr = pci_resource_start(pci_dev, 1);
-	dev->base = ioremap_nocache(addr, PAGE_SIZE);
+	dev->base = ioremap(addr, PAGE_SIZE);
 	dev->tx_descs = pci_alloc_consistent(pci_dev,
 			4 * DESC_SIZE * NR_TX_DESC, &dev->tx_phy_descs);
 	dev->rx_info.descs = pci_alloc_consistent(pci_dev,
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index b339125b2f09..31be3ba66877 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -64,6 +64,8 @@ static int sonic_open(struct net_device *dev)
 
 	netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
 
+	spin_lock_init(&lp->lock);
+
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
 		if (skb == NULL) {
@@ -114,6 +116,24 @@ static int sonic_open(struct net_device *dev)
 	return 0;
 }
 
+/* Wait for the SONIC to become idle. */
+static void sonic_quiesce(struct net_device *dev, u16 mask)
+{
+	struct sonic_local * __maybe_unused lp = netdev_priv(dev);
+	int i;
+	u16 bits;
+
+	for (i = 0; i < 1000; ++i) {
+		bits = SONIC_READ(SONIC_CMD) & mask;
+		if (!bits)
+			return;
+		if (irqs_disabled() || in_interrupt())
+			udelay(20);
+		else
+			usleep_range(100, 200);
+	}
+	WARN_ONCE(1, "command deadline expired! 0x%04x\n", bits);
+}
 
 /*
  * Close the SONIC device
@@ -130,6 +150,9 @@ static int sonic_close(struct net_device *dev)
 	/*
 	 * stop the SONIC, disable interrupts
 	 */
+	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS);
+	sonic_quiesce(dev, SONIC_CR_ALL);
+
 	SONIC_WRITE(SONIC_IMR, 0);
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -161,7 +184,7 @@ static int sonic_close(struct net_device *dev)
 	return 0;
 }
 
-static void sonic_tx_timeout(struct net_device *dev)
+static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
@@ -169,6 +192,9 @@ static void sonic_tx_timeout(struct net_device *dev)
 	 * put the Sonic into software-reset mode and
 	 * disable all interrupts before releasing DMA buffers
 	 */
+	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS);
+	sonic_quiesce(dev, SONIC_CR_ALL);
+
 	SONIC_WRITE(SONIC_IMR, 0);
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -206,8 +232,6 @@ static void sonic_tx_timeout(struct net_device *dev)
  *   wake the tx queue
  * Concurrently with all of this, the SONIC is potentially writing to
  * the status flags of the TDs.
- * Until some mutual exclusion is added, this code will not work with SMP. However,
- * MIPS Jazz machines and m68k Macs were all uni-processor machines.
  */
 
 static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
@@ -215,7 +239,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	struct sonic_local *lp = netdev_priv(dev);
 	dma_addr_t laddr;
 	int length;
-	int entry = lp->next_tx;
+	int entry;
+	unsigned long flags;
 
 	netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
 
@@ -237,6 +262,10 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
+	spin_lock_irqsave(&lp->lock, flags);
+
+	entry = lp->next_tx;
+
 	sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0);       /* clear status */
 	sonic_tda_put(dev, entry, SONIC_TD_FRAG_COUNT, 1);   /* single fragment */
 	sonic_tda_put(dev, entry, SONIC_TD_PKTSIZE, length); /* length of packet */
@@ -246,10 +275,6 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	sonic_tda_put(dev, entry, SONIC_TD_LINK,
 		sonic_tda_get(dev, entry, SONIC_TD_LINK) | SONIC_EOL);
 
-	/*
-	 * Must set tx_skb[entry] only after clearing status, and
-	 * before clearing EOL and before stopping queue
-	 */
 	wmb();
 	lp->tx_len[entry] = length;
 	lp->tx_laddr[entry] = laddr;
@@ -272,6 +297,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
 
+	spin_unlock_irqrestore(&lp->lock, flags);
+
 	return NETDEV_TX_OK;
 }
 
@@ -284,15 +311,28 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 	struct net_device *dev = dev_id;
 	struct sonic_local *lp = netdev_priv(dev);
 	int status;
+	unsigned long flags;
+
+	/* The lock has two purposes. Firstly, it synchronizes sonic_interrupt()
+	 * with sonic_send_packet() so that the two functions can share state.
+	 * Secondly, it makes sonic_interrupt() re-entrant, as that is required
+	 * by macsonic which must use two IRQs with different priority levels.
+	 */
+	spin_lock_irqsave(&lp->lock, flags);
+
+	status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT;
+	if (!status) {
+		spin_unlock_irqrestore(&lp->lock, flags);
 
-	if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT))
 		return IRQ_NONE;
+	}
 
 	do {
+		SONIC_WRITE(SONIC_ISR, status); /* clear the interrupt(s) */
+
 		if (status & SONIC_INT_PKTRX) {
 			netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
 			sonic_rx(dev);	/* got packet(s) */
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
 		}
 
 		if (status & SONIC_INT_TXDN) {
@@ -300,11 +340,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 			int td_status;
 			int freed_some = 0;
 
-			/* At this point, cur_tx is the index of a TD that is one of:
-			 *   unallocated/freed                          (status set   & tx_skb[entry] clear)
-			 *   allocated and sent                         (status set   & tx_skb[entry] set  )
-			 *   allocated and not yet sent                 (status clear & tx_skb[entry] set  )
-			 *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
+			/* The state of a Transmit Descriptor may be inferred
+			 * from { tx_skb[entry], td_status } as follows.
+			 * { clear, clear } => the TD has never been used
+			 * { set,   clear } => the TD was handed to SONIC
+			 * { set,   set   } => the TD was handed back
+			 * { clear, set   } => the TD is available for re-use
 			 */
 
 			netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
@@ -313,18 +354,19 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 				if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
 					break;
 
-				if (td_status & 0x0001) {
+				if (td_status & SONIC_TCR_PTX) {
 					lp->stats.tx_packets++;
 					lp->stats.tx_bytes += sonic_tda_get(dev, entry, SONIC_TD_PKTSIZE);
 				} else {
-					lp->stats.tx_errors++;
-					if (td_status & 0x0642)
+					if (td_status & (SONIC_TCR_EXD |
+					    SONIC_TCR_EXC | SONIC_TCR_BCM))
 						lp->stats.tx_aborted_errors++;
-					if (td_status & 0x0180)
+					if (td_status &
+					    (SONIC_TCR_NCRS | SONIC_TCR_CRLS))
 						lp->stats.tx_carrier_errors++;
-					if (td_status & 0x0020)
+					if (td_status & SONIC_TCR_OWC)
 						lp->stats.tx_window_errors++;
-					if (td_status & 0x0004)
+					if (td_status & SONIC_TCR_FU)
 						lp->stats.tx_fifo_errors++;
 				}
 
@@ -346,7 +388,6 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 			if (freed_some || lp->tx_skb[entry] == NULL)
 				netif_wake_queue(dev);  /* The ring is no longer full */
 			lp->cur_tx = entry;
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_TXDN); /* clear the interrupt */
 		}
 
 		/*
@@ -355,42 +396,37 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 		if (status & SONIC_INT_RFO) {
 			netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
 				  __func__);
-			lp->stats.rx_fifo_errors++;
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RDE) {
 			netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
 				  __func__);
-			lp->stats.rx_dropped++;
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RBAE) {
 			netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
 				  __func__);
-			lp->stats.rx_dropped++;
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
 		}
 
 		/* counter overruns; all counters are 16bit wide */
-		if (status & SONIC_INT_FAE) {
+		if (status & SONIC_INT_FAE)
 			lp->stats.rx_frame_errors += 65536;
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_FAE); /* clear the interrupt */
-		}
-		if (status & SONIC_INT_CRC) {
+		if (status & SONIC_INT_CRC)
 			lp->stats.rx_crc_errors += 65536;
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_CRC); /* clear the interrupt */
-		}
-		if (status & SONIC_INT_MP) {
+		if (status & SONIC_INT_MP)
 			lp->stats.rx_missed_errors += 65536;
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_MP); /* clear the interrupt */
-		}
 
 		/* transmit error */
 		if (status & SONIC_INT_TXER) {
-			if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
-				netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
-					  __func__);
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
+			u16 tcr = SONIC_READ(SONIC_TCR);
+
+			netif_dbg(lp, tx_err, dev, "%s: TXER intr, TCR %04x\n",
+				  __func__, tcr);
+
+			if (tcr & (SONIC_TCR_EXD | SONIC_TCR_EXC |
+				   SONIC_TCR_FU | SONIC_TCR_BCM)) {
+				/* Aborted transmission. Try again. */
+				netif_stop_queue(dev);
+				SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
+			}
 		}
 
 		/* bus retry */
@@ -400,107 +436,164 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 			/* ... to help debug DMA problems causing endless interrupts. */
 			/* Bounce the eth interface to turn on the interrupt again. */
 			SONIC_WRITE(SONIC_IMR, 0);
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_BR); /* clear the interrupt */
 		}
 
-		/* load CAM done */
-		if (status & SONIC_INT_LCD)
-			SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */
-	} while((status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT));
+		status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT;
+	} while (status);
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
 	return IRQ_HANDLED;
 }
 
+/* Return the array index corresponding to a given Receive Buffer pointer. */
+static int index_from_addr(struct sonic_local *lp, dma_addr_t addr,
+			   unsigned int last)
+{
+	unsigned int i = last;
+
+	do {
+		i = (i + 1) & SONIC_RRS_MASK;
+		if (addr == lp->rx_laddr[i])
+			return i;
+	} while (i != last);
+
+	return -ENOENT;
+}
+
+/* Allocate and map a new skb to be used as a receive buffer. */
+static bool sonic_alloc_rb(struct net_device *dev, struct sonic_local *lp,
+			   struct sk_buff **new_skb, dma_addr_t *new_addr)
+{
+	*new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
+	if (!*new_skb)
+		return false;
+
+	if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
+		skb_reserve(*new_skb, 2);
+
+	*new_addr = dma_map_single(lp->device, skb_put(*new_skb, SONIC_RBSIZE),
+				   SONIC_RBSIZE, DMA_FROM_DEVICE);
+	if (!*new_addr) {
+		dev_kfree_skb(*new_skb);
+		*new_skb = NULL;
+		return false;
+	}
+
+	return true;
+}
+
+/* Place a new receive resource in the Receive Resource Area and update RWP. */
+static void sonic_update_rra(struct net_device *dev, struct sonic_local *lp,
+			     dma_addr_t old_addr, dma_addr_t new_addr)
+{
+	unsigned int entry = sonic_rr_entry(dev, SONIC_READ(SONIC_RWP));
+	unsigned int end = sonic_rr_entry(dev, SONIC_READ(SONIC_RRP));
+	u32 buf;
+
+	/* The resources in the range [RRP, RWP) belong to the SONIC. This loop
+	 * scans the other resources in the RRA, those in the range [RWP, RRP).
+	 */
+	do {
+		buf = (sonic_rra_get(dev, entry, SONIC_RR_BUFADR_H) << 16) |
+		      sonic_rra_get(dev, entry, SONIC_RR_BUFADR_L);
+
+		if (buf == old_addr)
+			break;
+
+		entry = (entry + 1) & SONIC_RRS_MASK;
+	} while (entry != end);
+
+	WARN_ONCE(buf != old_addr, "failed to find resource!\n");
+
+	sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, new_addr >> 16);
+	sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, new_addr & 0xffff);
+
+	entry = (entry + 1) & SONIC_RRS_MASK;
+
+	SONIC_WRITE(SONIC_RWP, sonic_rr_addr(dev, entry));
+}
+
 /*
  * We have a good packet(s), pass it/them up the network stack.
  */
 static void sonic_rx(struct net_device *dev)
 {
 	struct sonic_local *lp = netdev_priv(dev);
-	int status;
 	int entry = lp->cur_rx;
+	int prev_entry = lp->eol_rx;
+	bool rbe = false;
 
 	while (sonic_rda_get(dev, entry, SONIC_RD_IN_USE) == 0) {
-		struct sk_buff *used_skb;
-		struct sk_buff *new_skb;
-		dma_addr_t new_laddr;
-		u16 bufadr_l;
-		u16 bufadr_h;
-		int pkt_len;
-
-		status = sonic_rda_get(dev, entry, SONIC_RD_STATUS);
-		if (status & SONIC_RCR_PRX) {
-			/* Malloc up new buffer. */
-			new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
-			if (new_skb == NULL) {
-				lp->stats.rx_dropped++;
+		u16 status = sonic_rda_get(dev, entry, SONIC_RD_STATUS);
+
+		/* If the RD has LPKT set, the chip has finished with the RB */
+		if ((status & SONIC_RCR_PRX) && (status & SONIC_RCR_LPKT)) {
+			struct sk_buff *new_skb;
+			dma_addr_t new_laddr;
+			u32 addr = (sonic_rda_get(dev, entry,
+						  SONIC_RD_PKTPTR_H) << 16) |
+				   sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L);
+			int i = index_from_addr(lp, addr, entry);
+
+			if (i < 0) {
+				WARN_ONCE(1, "failed to find buffer!\n");
 				break;
 			}
-			/* provide 16 byte IP header alignment unless DMA requires otherwise */
-			if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
-				skb_reserve(new_skb, 2);
-
-			new_laddr = dma_map_single(lp->device, skb_put(new_skb, SONIC_RBSIZE),
-		                               SONIC_RBSIZE, DMA_FROM_DEVICE);
-			if (!new_laddr) {
-				dev_kfree_skb(new_skb);
-				printk(KERN_ERR "%s: Failed to map rx buffer, dropping packet.\n", dev->name);
+
+			if (sonic_alloc_rb(dev, lp, &new_skb, &new_laddr)) {
+				struct sk_buff *used_skb = lp->rx_skb[i];
+				int pkt_len;
+
+				/* Pass the used buffer up the stack */
+				dma_unmap_single(lp->device, addr, SONIC_RBSIZE,
+						 DMA_FROM_DEVICE);
+
+				pkt_len = sonic_rda_get(dev, entry,
+							SONIC_RD_PKTLEN);
+				skb_trim(used_skb, pkt_len);
+				used_skb->protocol = eth_type_trans(used_skb,
+								    dev);
+				netif_rx(used_skb);
+				lp->stats.rx_packets++;
+				lp->stats.rx_bytes += pkt_len;
+
+				lp->rx_skb[i] = new_skb;
+				lp->rx_laddr[i] = new_laddr;
+			} else {
+				/* Failed to obtain a new buffer so re-use it */
+				new_laddr = addr;
 				lp->stats.rx_dropped++;
-				break;
 			}
-
-			/* now we have a new skb to replace it, pass the used one up the stack */
-			dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE);
-			used_skb = lp->rx_skb[entry];
-			pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN);
-			skb_trim(used_skb, pkt_len);
-			used_skb->protocol = eth_type_trans(used_skb, dev);
-			netif_rx(used_skb);
-			lp->stats.rx_packets++;
-			lp->stats.rx_bytes += pkt_len;
-
-			/* and insert the new skb */
-			lp->rx_laddr[entry] = new_laddr;
-			lp->rx_skb[entry] = new_skb;
-
-			bufadr_l = (unsigned long)new_laddr & 0xffff;
-			bufadr_h = (unsigned long)new_laddr >> 16;
-			sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, bufadr_l);
-			sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h);
-		} else {
-			/* This should only happen, if we enable accepting broken packets. */
-			lp->stats.rx_errors++;
-			if (status & SONIC_RCR_FAER)
-				lp->stats.rx_frame_errors++;
-			if (status & SONIC_RCR_CRCR)
-				lp->stats.rx_crc_errors++;
-		}
-		if (status & SONIC_RCR_LPKT) {
-			/*
-			 * this was the last packet out of the current receive buffer
-			 * give the buffer back to the SONIC
+			/* If RBE is already asserted when RWP advances then
+			 * it's safe to clear RBE after processing this packet.
 			 */
-			lp->cur_rwp += SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode);
-			if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
-			SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
-			if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
-				netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
-					  __func__);
-				SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
-			}
-		} else
-			printk(KERN_ERR "%s: rx desc without RCR_LPKT. Shouldn't happen !?\n",
-			     dev->name);
+			rbe = rbe || SONIC_READ(SONIC_ISR) & SONIC_INT_RBE;
+			sonic_update_rra(dev, lp, addr, new_laddr);
+		}
 		/*
 		 * give back the descriptor
 		 */
-		sonic_rda_put(dev, entry, SONIC_RD_LINK,
-			sonic_rda_get(dev, entry, SONIC_RD_LINK) | SONIC_EOL);
+		sonic_rda_put(dev, entry, SONIC_RD_STATUS, 0);
 		sonic_rda_put(dev, entry, SONIC_RD_IN_USE, 1);
-		sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK,
-			sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK) & ~SONIC_EOL);
-		lp->eol_rx = entry;
-		lp->cur_rx = entry = (entry + 1) & SONIC_RDS_MASK;
+
+		prev_entry = entry;
+		entry = (entry + 1) & SONIC_RDS_MASK;
+	}
+
+	lp->cur_rx = entry;
+
+	if (prev_entry != lp->eol_rx) {
+		/* Advance the EOL flag to put descriptors back into service */
+		sonic_rda_put(dev, prev_entry, SONIC_RD_LINK, SONIC_EOL |
+			      sonic_rda_get(dev, prev_entry, SONIC_RD_LINK));
+		sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK, ~SONIC_EOL &
+			      sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK));
+		lp->eol_rx = prev_entry;
 	}
+
+	if (rbe)
+		SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE);
 	/*
 	 * If any worth-while packets have been received, netif_rx()
 	 * has done a mark_bh(NET_BH) for us and will work on them
@@ -550,6 +643,8 @@ static void sonic_multicast_list(struct net_device *dev)
 		    (netdev_mc_count(dev) > 15)) {
 			rcr |= SONIC_RCR_AMC;
 		} else {
+			unsigned long flags;
+
 			netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
 				  netdev_mc_count(dev));
 			sonic_set_cam_enable(dev, 1);  /* always enable our own address */
@@ -563,9 +658,14 @@ static void sonic_multicast_list(struct net_device *dev)
 				i++;
 			}
 			SONIC_WRITE(SONIC_CDC, 16);
-			/* issue Load CAM command */
 			SONIC_WRITE(SONIC_CDP, lp->cda_laddr & 0xffff);
+
+			/* LCAM and TXP commands can't be used simultaneously */
+			spin_lock_irqsave(&lp->lock, flags);
+			sonic_quiesce(dev, SONIC_CR_TXP);
 			SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM);
+			sonic_quiesce(dev, SONIC_CR_LCAM);
+			spin_unlock_irqrestore(&lp->lock, flags);
 		}
 	}
 
@@ -580,7 +680,6 @@ static void sonic_multicast_list(struct net_device *dev)
  */
 static int sonic_init(struct net_device *dev)
 {
-	unsigned int cmd;
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
 
@@ -592,12 +691,16 @@ static int sonic_init(struct net_device *dev)
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
 
+	/* While in reset mode, clear CAM Enable register */
+	SONIC_WRITE(SONIC_CE, 0);
+
 	/*
 	 * clear software reset flag, disable receiver, clear and
 	 * enable interrupts, then completely initialize the SONIC
 	 */
 	SONIC_WRITE(SONIC_CMD, 0);
-	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS);
+	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS | SONIC_CR_STP);
+	sonic_quiesce(dev, SONIC_CR_ALL);
 
 	/*
 	 * initialize the receive resource area
@@ -615,15 +718,10 @@ static int sonic_init(struct net_device *dev)
 	}
 
 	/* initialize all RRA registers */
-	lp->rra_end = (lp->rra_laddr + SONIC_NUM_RRS * SIZEOF_SONIC_RR *
-					SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff;
-	lp->cur_rwp = (lp->rra_laddr + (SONIC_NUM_RRS - 1) * SIZEOF_SONIC_RR *
-					SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff;
-
-	SONIC_WRITE(SONIC_RSA, lp->rra_laddr & 0xffff);
-	SONIC_WRITE(SONIC_REA, lp->rra_end);
-	SONIC_WRITE(SONIC_RRP, lp->rra_laddr & 0xffff);
-	SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
+	SONIC_WRITE(SONIC_RSA, sonic_rr_addr(dev, 0));
+	SONIC_WRITE(SONIC_REA, sonic_rr_addr(dev, SONIC_NUM_RRS));
+	SONIC_WRITE(SONIC_RRP, sonic_rr_addr(dev, 0));
+	SONIC_WRITE(SONIC_RWP, sonic_rr_addr(dev, SONIC_NUM_RRS - 1));
 	SONIC_WRITE(SONIC_URRA, lp->rra_laddr >> 16);
 	SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
 
@@ -631,14 +729,7 @@ static int sonic_init(struct net_device *dev)
 	netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
-	i = 0;
-	while (i++ < 100) {
-		if (SONIC_READ(SONIC_CMD) & SONIC_CR_RRRA)
-			break;
-	}
-
-	netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
-		  SONIC_READ(SONIC_CMD), i);
+	sonic_quiesce(dev, SONIC_CR_RRRA);
 
 	/*
 	 * Initialize the receive descriptors so that they
@@ -713,28 +804,17 @@ static int sonic_init(struct net_device *dev)
 	 * load the CAM
 	 */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM);
-
-	i = 0;
-	while (i++ < 100) {
-		if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
-			break;
-	}
-	netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
-		  SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
+	sonic_quiesce(dev, SONIC_CR_LCAM);
 
 	/*
 	 * enable receiver, disable loopback
 	 * and enable all interrupts
 	 */
-	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXEN | SONIC_CR_STP);
 	SONIC_WRITE(SONIC_RCR, SONIC_RCR_DEFAULT);
 	SONIC_WRITE(SONIC_TCR, SONIC_TCR_DEFAULT);
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
 	SONIC_WRITE(SONIC_IMR, SONIC_IMR_DEFAULT);
-
-	cmd = SONIC_READ(SONIC_CMD);
-	if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
-		printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
+	SONIC_WRITE(SONIC_CMD, SONIC_CR_RXEN);
 
 	netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
 		  SONIC_READ(SONIC_CMD));
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 2b27f7049acb..e0e4cba6f6f6 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -110,6 +110,9 @@
 #define SONIC_CR_TXP            0x0002
 #define SONIC_CR_HTX            0x0001
 
+#define SONIC_CR_ALL (SONIC_CR_LCAM | SONIC_CR_RRRA | \
+		      SONIC_CR_RXEN | SONIC_CR_TXP)
+
 /*
  * SONIC data configuration bits
  */
@@ -175,6 +178,7 @@
 #define SONIC_TCR_NCRS          0x0100
 #define SONIC_TCR_CRLS          0x0080
 #define SONIC_TCR_EXC           0x0040
+#define SONIC_TCR_OWC           0x0020
 #define SONIC_TCR_PMB           0x0008
 #define SONIC_TCR_FU            0x0004
 #define SONIC_TCR_BCM           0x0002
@@ -274,8 +278,9 @@
 #define SONIC_NUM_RDS   SONIC_NUM_RRS /* number of receive descriptors */
 #define SONIC_NUM_TDS   16            /* number of transmit descriptors */
 
-#define SONIC_RDS_MASK  (SONIC_NUM_RDS-1)
-#define SONIC_TDS_MASK  (SONIC_NUM_TDS-1)
+#define SONIC_RRS_MASK  (SONIC_NUM_RRS - 1)
+#define SONIC_RDS_MASK  (SONIC_NUM_RDS - 1)
+#define SONIC_TDS_MASK  (SONIC_NUM_TDS - 1)
 
 #define SONIC_RBSIZE	1520          /* size of one resource buffer */
 
@@ -312,8 +317,6 @@ struct sonic_local {
 	u32 rda_laddr;              /* logical DMA address of RDA */
 	dma_addr_t rx_laddr[SONIC_NUM_RRS]; /* logical DMA addresses of rx skbuffs */
 	dma_addr_t tx_laddr[SONIC_NUM_TDS]; /* logical DMA addresses of tx skbuffs */
-	unsigned int rra_end;
-	unsigned int cur_rwp;
 	unsigned int cur_rx;
 	unsigned int cur_tx;           /* first unacked transmit packet */
 	unsigned int eol_rx;
@@ -322,6 +325,7 @@ struct sonic_local {
 	int msg_enable;
 	struct device *device;         /* generic device */
 	struct net_device_stats stats;
+	spinlock_t lock;
 };
 
 #define TX_TIMEOUT (3 * HZ)
@@ -336,7 +340,7 @@ static int sonic_close(struct net_device *dev);
 static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
 static int sonic_init(struct net_device *dev);
-static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void sonic_msg_init(struct net_device *dev);
 
 /* Internal inlines for reading/writing DMA buffers.  Note that bus
@@ -344,30 +348,30 @@ static void sonic_msg_init(struct net_device *dev);
    as far as we can tell. */
 /* OpenBSD calls this "SWO".  I'd like to think that sonic_buf_put()
    is a much better name. */
-static inline void sonic_buf_put(void* base, int bitmode,
+static inline void sonic_buf_put(u16 *base, int bitmode,
 				 int offset, __u16 val)
 {
 	if (bitmode)
 #ifdef __BIG_ENDIAN
-		((__u16 *) base + (offset*2))[1] = val;
+		__raw_writew(val, base + (offset * 2) + 1);
 #else
-		((__u16 *) base + (offset*2))[0] = val;
+		__raw_writew(val, base + (offset * 2) + 0);
 #endif
 	else
-	 	((__u16 *) base)[offset] = val;
+		__raw_writew(val, base + (offset * 1) + 0);
 }
 
-static inline __u16 sonic_buf_get(void* base, int bitmode,
+static inline __u16 sonic_buf_get(u16 *base, int bitmode,
 				  int offset)
 {
 	if (bitmode)
 #ifdef __BIG_ENDIAN
-		return ((volatile __u16 *) base + (offset*2))[1];
+		return __raw_readw(base + (offset * 2) + 1);
 #else
-		return ((volatile __u16 *) base + (offset*2))[0];
+		return __raw_readw(base + (offset * 2) + 0);
 #endif
 	else
-		return ((volatile __u16 *) base)[offset];
+		return __raw_readw(base + (offset * 1) + 0);
 }
 
 /* Inlines that you should actually use for reading/writing DMA buffers */
@@ -447,6 +451,22 @@ static inline __u16 sonic_rra_get(struct net_device* dev, int entry,
 			     (entry * SIZEOF_SONIC_RR) + offset);
 }
 
+static inline u16 sonic_rr_addr(struct net_device *dev, int entry)
+{
+	struct sonic_local *lp = netdev_priv(dev);
+
+	return lp->rra_laddr +
+	       entry * SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode);
+}
+
+static inline u16 sonic_rr_entry(struct net_device *dev, u16 addr)
+{
+	struct sonic_local *lp = netdev_priv(dev);
+
+	return (addr - (u16)lp->rra_laddr) / (SIZEOF_SONIC_RR *
+					      SONIC_BUS_SCALE(lp->dma_bitmode));
+}
+
 static const char version[] =
     "sonic.c:v0.92 20.9.98 tsbogend@alpha.franken.de\n";
 
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index e0b2bf327905..0ec6b8e8b549 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -7238,7 +7238,7 @@ out_unlock:
  *  void
  */
 
-static void s2io_tx_watchdog(struct net_device *dev)
+static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue)
 {
 	struct s2io_nic *sp = netdev_priv(dev);
 	struct swStat *swstats = &sp->mac_control.stats_info->sw_stat;
diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index 0a921f30f98f..6fa3159a977f 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h
@@ -1065,7 +1065,7 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp);
 static void tx_intr_handler(struct fifo_info *fifo_data);
 static void s2io_handle_errors(void * dev_id);
 
-static void s2io_tx_watchdog(struct net_device *dev);
+static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue);
 static void s2io_set_multicast(struct net_device *dev);
 static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp);
 static void s2io_link(struct s2io_nic * sp, int link);
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 1d334f2e0a56..9b63574b6202 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3273,7 +3273,7 @@ static int vxge_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
  * This function is triggered if the Tx Queue is stopped
  * for a pre-defined amount of time when the Interface is still up.
  */
-static void vxge_tx_watchdog(struct net_device *dev)
+static void vxge_tx_watchdog(struct net_device *dev, unsigned int txqueue)
 {
 	struct vxgedev *vdev;
 
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index bac5be4d4f43..a3f68a718813 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -31,6 +31,7 @@ config NFP_APP_FLOWER
 	bool "NFP4000/NFP6000 TC Flower offload support"
 	depends on NFP
 	depends on NET_SWITCHDEV
+	depends on IPV6!=m || NFP=m
 	default y
 	---help---
 	  Enable driver support for TC Flower offload on NFP4000 and NFP6000.
diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c
index 9f8a1f69c0c4..23ebddfb9532 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -176,10 +176,8 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
 	u8 mask, val;
 	int err;
 
-	if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) {
-		err = -EOPNOTSUPP;
+	if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack))
 		goto err_delete;
-	}
 
 	tos_off = proto == htons(ETH_P_IP) ? 16 : 20;
 
@@ -200,18 +198,14 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
 		if ((iter->val & cmask) == (val & cmask) &&
 		    iter->band != knode->res->classid) {
 			NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter");
-			err = -EOPNOTSUPP;
 			goto err_delete;
 		}
 	}
 
 	if (!match) {
 		match = kzalloc(sizeof(*match), GFP_KERNEL);
-		if (!match) {
-			err = -ENOMEM;
-			goto err_delete;
-		}
-
+		if (!match)
+			return -ENOMEM;
 		list_add(&match->list, &alink->dscp_map);
 	}
 	match->handle = knode->handle;
@@ -227,7 +221,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
 
 err_delete:
 	nfp_abm_u32_knode_delete(alink, knode);
-	return err;
+	return -EOPNOTSUPP;
 }
 
 static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type,
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h
index a460c75522be..d81d450be50e 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm.h
+++ b/drivers/net/ethernet/netronome/nfp/ccm.h
@@ -26,6 +26,7 @@ enum nfp_ccm_type {
 	NFP_CCM_TYPE_CRYPTO_ADD		= 10,
 	NFP_CCM_TYPE_CRYPTO_DEL		= 11,
 	NFP_CCM_TYPE_CRYPTO_UPDATE	= 12,
+	NFP_CCM_TYPE_CRYPTO_RESYNC	= 13,
 	__NFP_CCM_TYPE_MAX,
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
index 60372ddf69f0..bffe58bb2f27 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
+++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
@@ -4,6 +4,10 @@
 #ifndef NFP_CRYPTO_H
 #define NFP_CRYPTO_H 1
 
+struct net_device;
+struct nfp_net;
+struct nfp_net_tls_resync_req;
+
 struct nfp_net_tls_offload_ctx {
 	__be32 fw_handle[2];
 
@@ -17,11 +21,22 @@ struct nfp_net_tls_offload_ctx {
 
 #ifdef CONFIG_TLS_DEVICE
 int nfp_net_tls_init(struct nfp_net *nn);
+int nfp_net_tls_rx_resync_req(struct net_device *netdev,
+			      struct nfp_net_tls_resync_req *req,
+			      void *pkt, unsigned int pkt_len);
 #else
 static inline int nfp_net_tls_init(struct nfp_net *nn)
 {
 	return 0;
 }
+
+static inline int
+nfp_net_tls_rx_resync_req(struct net_device *netdev,
+			  struct nfp_net_tls_resync_req *req,
+			  void *pkt, unsigned int pkt_len)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
index 67413d946c4a..8d1458896bcb 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
@@ -9,6 +9,14 @@
 #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC	0
 #define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC	1
 
+struct nfp_net_tls_resync_req {
+	__be32 fw_handle[2];
+	__be32 tcp_seq;
+	u8 l3_offset;
+	u8 l4_offset;
+	u8 resv[2];
+};
+
 struct nfp_crypto_reply_simple {
 	struct nfp_ccm_hdr hdr;
 	__be32 error;
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
index 96a96b35c0ca..7c50e3dfb9d5 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c
+++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
@@ -5,6 +5,7 @@
 #include <linux/ipv6.h>
 #include <linux/skbuff.h>
 #include <linux/string.h>
+#include <net/inet6_hashtables.h>
 #include <net/tls.h>
 
 #include "../ccm.h"
@@ -391,8 +392,9 @@ nfp_net_tls_add(struct net_device *netdev, struct sock *sk,
 	if (direction == TLS_OFFLOAD_CTX_DIR_TX)
 		return 0;
 
-	tls_offload_rx_resync_set_type(sk,
-				       TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT);
+	if (!nn->tlv_caps.tls_resync_ss)
+		tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT);
+
 	return 0;
 
 err_fw_remove:
@@ -424,6 +426,7 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq,
 	struct nfp_net *nn = netdev_priv(netdev);
 	struct nfp_net_tls_offload_ctx *ntls;
 	struct nfp_crypto_req_update *req;
+	enum nfp_ccm_type type;
 	struct sk_buff *skb;
 	gfp_t flags;
 	int err;
@@ -442,15 +445,18 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq,
 	req->tcp_seq = cpu_to_be32(seq);
 	memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no));
 
+	type = NFP_CCM_TYPE_CRYPTO_UPDATE;
 	if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
-		err = nfp_net_tls_communicate_simple(nn, skb, "sync",
-						     NFP_CCM_TYPE_CRYPTO_UPDATE);
+		err = nfp_net_tls_communicate_simple(nn, skb, "sync", type);
 		if (err)
 			return err;
 		ntls->next_seq = seq;
 	} else {
-		nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE,
+		if (nn->tlv_caps.tls_resync_ss)
+			type = NFP_CCM_TYPE_CRYPTO_RESYNC;
+		nfp_ccm_mbox_post(nn, skb, type,
 				  sizeof(struct nfp_crypto_reply_simple));
+		atomic_inc(&nn->ktls_rx_resync_sent);
 	}
 
 	return 0;
@@ -462,6 +468,79 @@ static const struct tlsdev_ops nfp_net_tls_ops = {
 	.tls_dev_resync = nfp_net_tls_resync,
 };
 
+int nfp_net_tls_rx_resync_req(struct net_device *netdev,
+			      struct nfp_net_tls_resync_req *req,
+			      void *pkt, unsigned int pkt_len)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_tls_offload_ctx *ntls;
+	struct ipv6hdr *ipv6h;
+	struct tcphdr *th;
+	struct iphdr *iph;
+	struct sock *sk;
+	__be32 tcp_seq;
+	int err;
+
+	iph = pkt + req->l3_offset;
+	ipv6h = pkt + req->l3_offset;
+	th = pkt + req->l4_offset;
+
+	if ((u8 *)&th[1] > (u8 *)pkt + pkt_len) {
+		netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu pkt_len: %u)\n",
+				 req->l3_offset, req->l4_offset, pkt_len);
+		err = -EINVAL;
+		goto err_cnt_ign;
+	}
+
+	switch (iph->version) {
+	case 4:
+		sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
+					     iph->saddr, th->source, iph->daddr,
+					     th->dest, netdev->ifindex);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case 6:
+		sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
+						&ipv6h->saddr, th->source,
+						&ipv6h->daddr, ntohs(th->dest),
+						netdev->ifindex, 0);
+		break;
+#endif
+	default:
+		netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu ipver: %u)\n",
+				 req->l3_offset, req->l4_offset, iph->version);
+		err = -EINVAL;
+		goto err_cnt_ign;
+	}
+
+	err = 0;
+	if (!sk)
+		goto err_cnt_ign;
+	if (!tls_is_sk_rx_device_offloaded(sk) ||
+	    sk->sk_shutdown & RCV_SHUTDOWN)
+		goto err_put_sock;
+
+	ntls = tls_driver_ctx(sk, TLS_OFFLOAD_CTX_DIR_RX);
+	/* some FW versions can't report the handle and report 0s */
+	if (memchr_inv(&req->fw_handle, 0, sizeof(req->fw_handle)) &&
+	    memcmp(&req->fw_handle, &ntls->fw_handle, sizeof(ntls->fw_handle)))
+		goto err_put_sock;
+
+	/* copy to ensure alignment */
+	memcpy(&tcp_seq, &req->tcp_seq, sizeof(tcp_seq));
+	tls_offload_rx_resync_request(sk, tcp_seq);
+	atomic_inc(&nn->ktls_rx_resync_req);
+
+	sock_gen_put(sk);
+	return 0;
+
+err_put_sock:
+	sock_gen_put(sk);
+err_cnt_ign:
+	atomic_inc(&nn->ktls_rx_resync_ign);
+	return err;
+}
+
 static int nfp_net_tls_reset(struct nfp_net *nn)
 {
 	struct nfp_crypto_req_reset *req;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 1b019fdfcd97..c06600fb47ff 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -22,8 +22,9 @@
 #define NFP_FL_TUNNEL_CSUM			cpu_to_be16(0x01)
 #define NFP_FL_TUNNEL_KEY			cpu_to_be16(0x04)
 #define NFP_FL_TUNNEL_GENEVE_OPT		cpu_to_be16(0x0800)
-#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS	IP_TUNNEL_INFO_TX
-#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS	(NFP_FL_TUNNEL_CSUM | \
+#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS	(IP_TUNNEL_INFO_TX | \
+						 IP_TUNNEL_INFO_IPV6)
+#define NFP_FL_SUPPORTED_UDP_TUN_FLAGS		(NFP_FL_TUNNEL_CSUM | \
 						 NFP_FL_TUNNEL_KEY | \
 						 NFP_FL_TUNNEL_GENEVE_OPT)
 
@@ -394,19 +395,26 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
 }
 
 static int
-nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
-		    const struct flow_action_entry *act,
-		    struct nfp_fl_pre_tunnel *pre_tun,
-		    enum nfp_flower_tun_type tun_type,
-		    struct net_device *netdev, struct netlink_ext_ack *extack)
+nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
+	       const struct flow_action_entry *act,
+	       struct nfp_fl_pre_tunnel *pre_tun,
+	       enum nfp_flower_tun_type tun_type,
+	       struct net_device *netdev, struct netlink_ext_ack *extack)
 {
-	size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun);
 	const struct ip_tunnel_info *ip_tun = act->tunnel;
+	bool ipv6 = ip_tunnel_info_af(ip_tun) == AF_INET6;
+	size_t act_size = sizeof(struct nfp_fl_set_tun);
 	struct nfp_flower_priv *priv = app->priv;
 	u32 tmp_set_ip_tun_type_index = 0;
 	/* Currently support one pre-tunnel so index is always 0. */
 	int pretun_idx = 0;
 
+	if (!IS_ENABLED(CONFIG_IPV6) && ipv6)
+		return -EOPNOTSUPP;
+
+	if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN))
+		return -EOPNOTSUPP;
+
 	BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM ||
 		     NFP_FL_TUNNEL_KEY	!= TUNNEL_KEY ||
 		     NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT);
@@ -417,19 +425,35 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
 		return -EOPNOTSUPP;
 	}
 
-	set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
+	set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL;
 	set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
 
 	/* Set tunnel type and pre-tunnel index. */
 	tmp_set_ip_tun_type_index |=
-		FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, tun_type) |
-		FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx);
+		FIELD_PREP(NFP_FL_TUNNEL_TYPE, tun_type) |
+		FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx);
 
 	set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
 	set_tun->tun_id = ip_tun->key.tun_id;
 
 	if (ip_tun->key.ttl) {
 		set_tun->ttl = ip_tun->key.ttl;
+#ifdef CONFIG_IPV6
+	} else if (ipv6) {
+		struct net *net = dev_net(netdev);
+		struct flowi6 flow = {};
+		struct dst_entry *dst;
+
+		flow.daddr = ip_tun->key.u.ipv6.dst;
+		flow.flowi4_proto = IPPROTO_UDP;
+		dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL);
+		if (!IS_ERR(dst)) {
+			set_tun->ttl = ip6_dst_hoplimit(dst);
+			dst_release(dst);
+		} else {
+			set_tun->ttl = net->ipv6.devconf_all->hop_limit;
+		}
+#endif
 	} else {
 		struct net *net = dev_net(netdev);
 		struct flowi4 flow = {};
@@ -455,7 +479,7 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
 	set_tun->tos = ip_tun->key.tos;
 
 	if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||
-	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) {
+	    ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload");
 		return -EOPNOTSUPP;
 	}
@@ -467,7 +491,12 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
 	}
 
 	/* Complete pre_tunnel action. */
-	pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
+	if (ipv6) {
+		pre_tun->flags |= cpu_to_be16(NFP_FL_PRE_TUN_IPV6);
+		pre_tun->ipv6_dst = ip_tun->key.u.ipv6.dst;
+	} else {
+		pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
+	}
 
 	return 0;
 }
@@ -956,8 +985,8 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 		       struct nfp_flower_pedit_acts *set_act, bool *pkt_host,
 		       struct netlink_ext_ack *extack, int act_idx)
 {
-	struct nfp_fl_set_ipv4_tun *set_tun;
 	struct nfp_fl_pre_tunnel *pre_tun;
+	struct nfp_fl_set_tun *set_tun;
 	struct nfp_fl_push_vlan *psh_v;
 	struct nfp_fl_push_mpls *psh_m;
 	struct nfp_fl_pop_vlan *pop_v;
@@ -1032,7 +1061,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 		 * If none, the packet falls back before applying other actions.
 		 */
 		if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
-		    sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) {
+		    sizeof(struct nfp_fl_set_tun) > NFP_FL_MAX_A_SIZ) {
 			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap");
 			return -EOPNOTSUPP;
 		}
@@ -1046,11 +1075,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
 			return err;
 
 		set_tun = (void *)&nfp_fl->action_data[*a_len];
-		err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun,
-					  *tun_type, netdev, extack);
+		err = nfp_fl_set_tun(app, set_tun, act, pre_tun, *tun_type,
+				     netdev, extack);
 		if (err)
 			return err;
-		*a_len += sizeof(struct nfp_fl_set_ipv4_tun);
+		*a_len += sizeof(struct nfp_fl_set_tun);
 		}
 		break;
 	case FLOW_ACTION_TUNNEL_DECAP:
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 05981b54eaab..a595ddb92bff 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -270,11 +270,17 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
 		}
 		goto err_default;
 	case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
-		nfp_tunnel_request_route(app, skb);
+		nfp_tunnel_request_route_v4(app, skb);
+		break;
+	case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6:
+		nfp_tunnel_request_route_v6(app, skb);
 		break;
 	case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
 		nfp_tunnel_keep_alive(app, skb);
 		break;
+	case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6:
+		nfp_tunnel_keep_alive_v6(app, skb);
+		break;
 	case NFP_FLOWER_CMSG_TYPE_QOS_STATS:
 		nfp_flower_stats_rlim_reply(app, skb);
 		break;
@@ -361,7 +367,8 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
 		   nfp_flower_process_mtu_ack(app, skb)) {
 		/* Handle MTU acks outside wq to prevent RTNL conflict. */
 		dev_consume_skb_any(skb);
-	} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) {
+	} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH ||
+		   cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) {
 		/* Acks from the NFP that the route is added - ignore. */
 		dev_consume_skb_any(skb);
 	} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 7eb2ec8969c3..9b50d76bbc09 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -26,6 +26,7 @@
 #define NFP_FLOWER_LAYER2_GRE		BIT(0)
 #define NFP_FLOWER_LAYER2_GENEVE	BIT(5)
 #define NFP_FLOWER_LAYER2_GENEVE_OP	BIT(6)
+#define NFP_FLOWER_LAYER2_TUN_IPV6	BIT(7)
 
 #define NFP_FLOWER_MASK_VLAN_PRIO	GENMASK(15, 13)
 #define NFP_FLOWER_MASK_VLAN_PRESENT	BIT(12)
@@ -63,6 +64,7 @@
 #define NFP_FL_MAX_GENEVE_OPT_ACT	32
 #define NFP_FL_MAX_GENEVE_OPT_CNT	64
 #define NFP_FL_MAX_GENEVE_OPT_KEY	32
+#define NFP_FL_MAX_GENEVE_OPT_KEY_V6	8
 
 /* Action opcodes */
 #define NFP_FL_ACTION_OPCODE_OUTPUT		0
@@ -70,7 +72,7 @@
 #define NFP_FL_ACTION_OPCODE_POP_VLAN		2
 #define NFP_FL_ACTION_OPCODE_PUSH_MPLS		3
 #define NFP_FL_ACTION_OPCODE_POP_MPLS		4
-#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL	6
+#define NFP_FL_ACTION_OPCODE_SET_TUNNEL		6
 #define NFP_FL_ACTION_OPCODE_SET_ETHERNET	7
 #define NFP_FL_ACTION_OPCODE_SET_MPLS		8
 #define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS	9
@@ -99,8 +101,8 @@
 
 /* Tunnel ports */
 #define NFP_FL_PORT_TYPE_TUN		0x50000000
-#define NFP_FL_IPV4_TUNNEL_TYPE		GENMASK(7, 4)
-#define NFP_FL_IPV4_PRE_TUN_INDEX	GENMASK(2, 0)
+#define NFP_FL_TUNNEL_TYPE		GENMASK(7, 4)
+#define NFP_FL_PRE_TUN_INDEX		GENMASK(2, 0)
 
 #define NFP_FLOWER_WORKQ_MAX_SKBS	30000
 
@@ -206,13 +208,16 @@ struct nfp_fl_pre_lag {
 
 struct nfp_fl_pre_tunnel {
 	struct nfp_fl_act_head head;
-	__be16 reserved;
-	__be32 ipv4_dst;
-	/* reserved for use with IPv6 addresses */
-	__be32 extra[3];
+	__be16 flags;
+	union {
+		__be32 ipv4_dst;
+		struct in6_addr ipv6_dst;
+	};
 };
 
-struct nfp_fl_set_ipv4_tun {
+#define NFP_FL_PRE_TUN_IPV6	BIT(0)
+
+struct nfp_fl_set_tun {
 	struct nfp_fl_act_head head;
 	__be16 reserved;
 	__be64 tun_id __packed;
@@ -387,6 +392,11 @@ struct nfp_flower_tun_ipv4 {
 	__be32 dst;
 };
 
+struct nfp_flower_tun_ipv6 {
+	struct in6_addr src;
+	struct in6_addr dst;
+};
+
 struct nfp_flower_tun_ip_ext {
 	u8 tos;
 	u8 ttl;
@@ -416,6 +426,42 @@ struct nfp_flower_ipv4_udp_tun {
 	__be32 tun_id;
 };
 
+/* Flow Frame IPv6 UDP TUNNEL --> Tunnel details (11W/44B)
+ * -----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           Reserved            |      tos      |      ttl      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                            Reserved                           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     VNI                       |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv6_udp_tun {
+	struct nfp_flower_tun_ipv6 ipv6;
+	__be16 reserved1;
+	struct nfp_flower_tun_ip_ext ip_ext;
+	__be32 reserved2;
+	__be32 tun_id;
+};
+
 /* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B)
  * -----------------------------------------------------------------
  *    3                   2                   1
@@ -445,6 +491,46 @@ struct nfp_flower_ipv4_gre_tun {
 	__be32 reserved2;
 };
 
+/* Flow Frame GRE TUNNEL V6 --> Tunnel details (12W/48B)
+ * -----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           tun_flags           |       tos     |       ttl     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |            Reserved           |           Ethertype           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                              Key                              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Reserved                            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv6_gre_tun {
+	struct nfp_flower_tun_ipv6 ipv6;
+	__be16 tun_flags;
+	struct nfp_flower_tun_ip_ext ip_ext;
+	__be16 reserved1;
+	__be16 ethertype;
+	__be32 tun_key;
+	__be32 reserved2;
+};
+
 struct nfp_flower_geneve_options {
 	u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
 };
@@ -485,6 +571,10 @@ enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_QOS_DEL =		19,
 	NFP_FLOWER_CMSG_TYPE_QOS_STATS =	20,
 	NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE =	21,
+	NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6 =	22,
+	NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6 =	23,
+	NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 =	24,
+	NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6 =	25,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index e0c985fcaec1..d55d0d33bc45 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -43,6 +43,7 @@ struct nfp_app;
 #define NFP_FL_FEATS_VF_RLIM		BIT(4)
 #define NFP_FL_FEATS_FLOW_MOD		BIT(5)
 #define NFP_FL_FEATS_PRE_TUN_RULES	BIT(6)
+#define NFP_FL_FEATS_IPV6_TUN		BIT(7)
 #define NFP_FL_FEATS_FLOW_MERGE		BIT(30)
 #define NFP_FL_FEATS_LAG		BIT(31)
 
@@ -62,18 +63,26 @@ struct nfp_fl_stats_id {
  * struct nfp_fl_tunnel_offloads - priv data for tunnel offloads
  * @offloaded_macs:	Hashtable of the offloaded MAC addresses
  * @ipv4_off_list:	List of IPv4 addresses to offload
- * @neigh_off_list:	List of neighbour offloads
+ * @ipv6_off_list:	List of IPv6 addresses to offload
+ * @neigh_off_list_v4:	List of IPv4 neighbour offloads
+ * @neigh_off_list_v6:	List of IPv6 neighbour offloads
  * @ipv4_off_lock:	Lock for the IPv4 address list
- * @neigh_off_lock:	Lock for the neighbour address list
+ * @ipv6_off_lock:	Lock for the IPv6 address list
+ * @neigh_off_lock_v4:	Lock for the IPv4 neighbour address list
+ * @neigh_off_lock_v6:	Lock for the IPv6 neighbour address list
  * @mac_off_ids:	IDA to manage id assignment for offloaded MACs
  * @neigh_nb:		Notifier to monitor neighbour state
  */
 struct nfp_fl_tunnel_offloads {
 	struct rhashtable offloaded_macs;
 	struct list_head ipv4_off_list;
-	struct list_head neigh_off_list;
+	struct list_head ipv6_off_list;
+	struct list_head neigh_off_list_v4;
+	struct list_head neigh_off_list_v6;
 	struct mutex ipv4_off_lock;
-	spinlock_t neigh_off_lock;
+	struct mutex ipv6_off_lock;
+	spinlock_t neigh_off_lock_v4;
+	spinlock_t neigh_off_lock_v6;
 	struct ida mac_off_ids;
 	struct notifier_block neigh_nb;
 };
@@ -273,12 +282,25 @@ struct nfp_fl_stats {
 	u64 used;
 };
 
+/**
+ * struct nfp_ipv6_addr_entry - cached IPv6 addresses
+ * @ipv6_addr:	IP address
+ * @ref_count:	number of rules currently using this IP
+ * @list:	list pointer
+ */
+struct nfp_ipv6_addr_entry {
+	struct in6_addr ipv6_addr;
+	int ref_count;
+	struct list_head list;
+};
+
 struct nfp_fl_payload {
 	struct nfp_fl_rule_metadata meta;
 	unsigned long tc_flower_cookie;
 	struct rhash_head fl_node;
 	struct rcu_head rcu;
 	__be32 nfp_tun_ipv4_addr;
+	struct nfp_ipv6_addr_entry *nfp_tun_ipv6;
 	struct net_device *ingress_dev;
 	char *unmasked_data;
 	char *mask_data;
@@ -396,8 +418,14 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app,
 				 unsigned long event, void *ptr);
 void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
 void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
-void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
+void
+nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry);
+struct nfp_ipv6_addr_entry *
+nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6);
+void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb);
+void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb);
 void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
+void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb);
 void nfp_flower_lag_init(struct nfp_fl_lag *lag);
 void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag);
 int nfp_flower_lag_reset(struct nfp_fl_lag *lag);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 9cc3ba17ff69..546bc01d507d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -10,9 +10,8 @@
 static void
 nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
 			    struct nfp_flower_meta_tci *msk,
-			    struct flow_cls_offload *flow, u8 key_type)
+			    struct flow_rule *rule, u8 key_type)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	u16 tmp_tci;
 
 	memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
@@ -77,11 +76,8 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
 
 static void
 nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
-		       struct nfp_flower_mac_mpls *msk,
-		       struct flow_cls_offload *flow)
+		       struct nfp_flower_mac_mpls *msk, struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
 	memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
 	memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
 
@@ -130,10 +126,8 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
 static void
 nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
 			 struct nfp_flower_tp_ports *msk,
-			 struct flow_cls_offload *flow)
+			 struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
 	memset(ext, 0, sizeof(struct nfp_flower_tp_ports));
 	memset(msk, 0, sizeof(struct nfp_flower_tp_ports));
 
@@ -150,11 +144,8 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
 
 static void
 nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
-			  struct nfp_flower_ip_ext *msk,
-			  struct flow_cls_offload *flow)
+			  struct nfp_flower_ip_ext *msk, struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 		struct flow_match_basic match;
 
@@ -224,10 +215,8 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
 
 static void
 nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
-			struct nfp_flower_ipv4 *msk,
-			struct flow_cls_offload *flow)
+			struct nfp_flower_ipv4 *msk, struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	struct flow_match_ipv4_addrs match;
 
 	memset(ext, 0, sizeof(struct nfp_flower_ipv4));
@@ -241,16 +230,13 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
 		msk->ipv4_dst = match.mask->dst;
 	}
 
-	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
 }
 
 static void
 nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
-			struct nfp_flower_ipv6 *msk,
-			struct flow_cls_offload *flow)
+			struct nfp_flower_ipv6 *msk, struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
 	memset(ext, 0, sizeof(struct nfp_flower_ipv6));
 	memset(msk, 0, sizeof(struct nfp_flower_ipv6));
 
@@ -264,16 +250,15 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
 		msk->ipv6_dst = match.mask->dst;
 	}
 
-	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
 }
 
 static int
-nfp_flower_compile_geneve_opt(void *ext, void *msk,
-			      struct flow_cls_offload *flow)
+nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule)
 {
 	struct flow_match_enc_opts match;
 
-	flow_rule_match_enc_opts(flow->rule, &match);
+	flow_rule_match_enc_opts(rule, &match);
 	memcpy(ext, match.key->data, match.key->len);
 	memcpy(msk, match.mask->data, match.mask->len);
 
@@ -283,10 +268,8 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk,
 static void
 nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
 				  struct nfp_flower_tun_ipv4 *msk,
-				  struct flow_cls_offload *flow)
+				  struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
 		struct flow_match_ipv4_addrs match;
 
@@ -299,12 +282,26 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
 }
 
 static void
+nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext,
+				  struct nfp_flower_tun_ipv6 *msk,
+				  struct flow_rule *rule)
+{
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_enc_ipv6_addrs(rule, &match);
+		ext->src = match.key->src;
+		ext->dst = match.key->dst;
+		msk->src = match.mask->src;
+		msk->dst = match.mask->dst;
+	}
+}
+
+static void
 nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
 			      struct nfp_flower_tun_ip_ext *msk,
-			      struct flow_cls_offload *flow)
+			      struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
 		struct flow_match_ip match;
 
@@ -317,57 +314,97 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
 }
 
 static void
-nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
-				struct nfp_flower_ipv4_gre_tun *msk,
-				struct flow_cls_offload *flow)
+nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk,
+			       struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
-	memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
-	memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+		u32 vni;
 
-	/* NVGRE is the only supported GRE tunnel type */
-	ext->ethertype = cpu_to_be16(ETH_P_TEB);
-	msk->ethertype = cpu_to_be16(~0);
+		flow_rule_match_enc_keyid(rule, &match);
+		vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		*key = cpu_to_be32(vni);
+		vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		*key_msk = cpu_to_be32(vni);
+	}
+}
 
+static void
+nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags,
+			       __be16 *flags_msk, struct flow_rule *rule)
+{
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
 		struct flow_match_enc_keyid match;
 
 		flow_rule_match_enc_keyid(rule, &match);
-		ext->tun_key = match.key->keyid;
-		msk->tun_key = match.mask->keyid;
+		*key = match.key->keyid;
+		*key_msk = match.mask->keyid;
 
-		ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
-		msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+		*flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+		*flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
 	}
+}
+
+static void
+nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
+				struct nfp_flower_ipv4_gre_tun *msk,
+				struct flow_rule *rule)
+{
+	memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+
+	/* NVGRE is the only supported GRE tunnel type */
+	ext->ethertype = cpu_to_be16(ETH_P_TEB);
+	msk->ethertype = cpu_to_be16(~0);
 
-	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
-	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+	nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key,
+				       &ext->tun_flags, &msk->tun_flags, rule);
 }
 
 static void
 nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
 				struct nfp_flower_ipv4_udp_tun *msk,
-				struct flow_cls_offload *flow)
+				struct flow_rule *rule)
 {
-	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
 	memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
 	memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
 
-	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_match_enc_keyid match;
-		u32 temp_vni;
+	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+	nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
+}
 
-		flow_rule_match_enc_keyid(rule, &match);
-		temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
-		ext->tun_id = cpu_to_be32(temp_vni);
-		temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
-		msk->tun_id = cpu_to_be32(temp_vni);
-	}
+static void
+nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext,
+				struct nfp_flower_ipv6_udp_tun *msk,
+				struct flow_rule *rule)
+{
+	memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
+
+	nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+	nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
+}
+
+static void
+nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
+				struct nfp_flower_ipv6_gre_tun *msk,
+				struct flow_rule *rule)
+{
+	memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
+
+	/* NVGRE is the only supported GRE tunnel type */
+	ext->ethertype = cpu_to_be16(ETH_P_TEB);
+	msk->ethertype = cpu_to_be16(~0);
 
-	nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
-	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+	nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule);
+	nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+	nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key,
+				       &ext->tun_flags, &msk->tun_flags, rule);
 }
 
 int nfp_flower_compile_flow_match(struct nfp_app *app,
@@ -378,6 +415,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 				  enum nfp_flower_tun_type tun_type,
 				  struct netlink_ext_ack *extack)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
 	u32 port_id;
 	int err;
 	u8 *ext;
@@ -393,7 +431,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 
 	nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext,
 				    (struct nfp_flower_meta_tci *)msk,
-				    flow, key_ls->key_layer);
+				    rule, key_ls->key_layer);
 	ext += sizeof(struct nfp_flower_meta_tci);
 	msk += sizeof(struct nfp_flower_meta_tci);
 
@@ -425,7 +463,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
 		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
 				       (struct nfp_flower_mac_mpls *)msk,
-				       flow);
+				       rule);
 		ext += sizeof(struct nfp_flower_mac_mpls);
 		msk += sizeof(struct nfp_flower_mac_mpls);
 	}
@@ -433,7 +471,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) {
 		nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext,
 					 (struct nfp_flower_tp_ports *)msk,
-					 flow);
+					 rule);
 		ext += sizeof(struct nfp_flower_tp_ports);
 		msk += sizeof(struct nfp_flower_tp_ports);
 	}
@@ -441,7 +479,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) {
 		nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext,
 					(struct nfp_flower_ipv4 *)msk,
-					flow);
+					rule);
 		ext += sizeof(struct nfp_flower_ipv4);
 		msk += sizeof(struct nfp_flower_ipv4);
 	}
@@ -449,43 +487,83 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) {
 		nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext,
 					(struct nfp_flower_ipv6 *)msk,
-					flow);
+					rule);
 		ext += sizeof(struct nfp_flower_ipv6);
 		msk += sizeof(struct nfp_flower_ipv6);
 	}
 
 	if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) {
-		__be32 tun_dst;
-
-		nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow);
-		tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst;
-		ext += sizeof(struct nfp_flower_ipv4_gre_tun);
-		msk += sizeof(struct nfp_flower_ipv4_gre_tun);
-
-		/* Store the tunnel destination in the rule data.
-		 * This must be present and be an exact match.
-		 */
-		nfp_flow->nfp_tun_ipv4_addr = tun_dst;
-		nfp_tunnel_add_ipv4_off(app, tun_dst);
+		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+			struct nfp_flower_ipv6_gre_tun *gre_match;
+			struct nfp_ipv6_addr_entry *entry;
+			struct in6_addr *dst;
+
+			nfp_flower_compile_ipv6_gre_tun((void *)ext,
+							(void *)msk, rule);
+			gre_match = (struct nfp_flower_ipv6_gre_tun *)ext;
+			dst = &gre_match->ipv6.dst;
+			ext += sizeof(struct nfp_flower_ipv6_gre_tun);
+			msk += sizeof(struct nfp_flower_ipv6_gre_tun);
+
+			entry = nfp_tunnel_add_ipv6_off(app, dst);
+			if (!entry)
+				return -EOPNOTSUPP;
+
+			nfp_flow->nfp_tun_ipv6 = entry;
+		} else {
+			__be32 dst;
+
+			nfp_flower_compile_ipv4_gre_tun((void *)ext,
+							(void *)msk, rule);
+			dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst;
+			ext += sizeof(struct nfp_flower_ipv4_gre_tun);
+			msk += sizeof(struct nfp_flower_ipv4_gre_tun);
+
+			/* Store the tunnel destination in the rule data.
+			 * This must be present and be an exact match.
+			 */
+			nfp_flow->nfp_tun_ipv4_addr = dst;
+			nfp_tunnel_add_ipv4_off(app, dst);
+		}
 	}
 
 	if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
 	    key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
-		__be32 tun_dst;
-
-		nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow);
-		tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst;
-		ext += sizeof(struct nfp_flower_ipv4_udp_tun);
-		msk += sizeof(struct nfp_flower_ipv4_udp_tun);
-
-		/* Store the tunnel destination in the rule data.
-		 * This must be present and be an exact match.
-		 */
-		nfp_flow->nfp_tun_ipv4_addr = tun_dst;
-		nfp_tunnel_add_ipv4_off(app, tun_dst);
+		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+			struct nfp_flower_ipv6_udp_tun *udp_match;
+			struct nfp_ipv6_addr_entry *entry;
+			struct in6_addr *dst;
+
+			nfp_flower_compile_ipv6_udp_tun((void *)ext,
+							(void *)msk, rule);
+			udp_match = (struct nfp_flower_ipv6_udp_tun *)ext;
+			dst = &udp_match->ipv6.dst;
+			ext += sizeof(struct nfp_flower_ipv6_udp_tun);
+			msk += sizeof(struct nfp_flower_ipv6_udp_tun);
+
+			entry = nfp_tunnel_add_ipv6_off(app, dst);
+			if (!entry)
+				return -EOPNOTSUPP;
+
+			nfp_flow->nfp_tun_ipv6 = entry;
+		} else {
+			__be32 dst;
+
+			nfp_flower_compile_ipv4_udp_tun((void *)ext,
+							(void *)msk, rule);
+			dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst;
+			ext += sizeof(struct nfp_flower_ipv4_udp_tun);
+			msk += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+			/* Store the tunnel destination in the rule data.
+			 * This must be present and be an exact match.
+			 */
+			nfp_flow->nfp_tun_ipv4_addr = dst;
+			nfp_tunnel_add_ipv4_off(app, dst);
+		}
 
 		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
-			err = nfp_flower_compile_geneve_opt(ext, msk, flow);
+			err = nfp_flower_compile_geneve_opt(ext, msk, rule);
 			if (err)
 				return err;
 		}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 987ae221f6be..7ca5c1becfcf 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -54,6 +54,10 @@
 	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
 	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS))
 
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R \
+	(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+	 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS))
+
 #define NFP_FLOWER_MERGE_FIELDS \
 	(NFP_FLOWER_LAYER_PORT | \
 	 NFP_FLOWER_LAYER_MAC | \
@@ -64,7 +68,8 @@
 #define NFP_FLOWER_PRE_TUN_RULE_FIELDS \
 	(NFP_FLOWER_LAYER_PORT | \
 	 NFP_FLOWER_LAYER_MAC | \
-	 NFP_FLOWER_LAYER_IPV4)
+	 NFP_FLOWER_LAYER_IPV4 | \
+	 NFP_FLOWER_LAYER_IPV6)
 
 struct nfp_flower_merge_check {
 	union {
@@ -146,10 +151,11 @@ static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f)
 
 static int
 nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
-			  u32 *key_layer_two, int *key_size,
+			  u32 *key_layer_two, int *key_size, bool ipv6,
 			  struct netlink_ext_ack *extack)
 {
-	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) {
+	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY ||
+	    (ipv6 && enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY_V6)) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length");
 		return -EOPNOTSUPP;
 	}
@@ -167,7 +173,7 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
 			      struct flow_dissector_key_enc_opts *enc_op,
 			      u32 *key_layer_two, u8 *key_layer, int *key_size,
 			      struct nfp_flower_priv *priv,
-			      enum nfp_flower_tun_type *tun_type,
+			      enum nfp_flower_tun_type *tun_type, bool ipv6,
 			      struct netlink_ext_ack *extack)
 {
 	int err;
@@ -176,7 +182,15 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
 	case htons(IANA_VXLAN_UDP_PORT):
 		*tun_type = NFP_FL_TUNNEL_VXLAN;
 		*key_layer |= NFP_FLOWER_LAYER_VXLAN;
-		*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+		if (ipv6) {
+			*key_layer |= NFP_FLOWER_LAYER_EXT_META;
+			*key_size += sizeof(struct nfp_flower_ext_meta);
+			*key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6;
+			*key_size += sizeof(struct nfp_flower_ipv6_udp_tun);
+		} else {
+			*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+		}
 
 		if (enc_op) {
 			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels");
@@ -192,7 +206,13 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
 		*key_layer |= NFP_FLOWER_LAYER_EXT_META;
 		*key_size += sizeof(struct nfp_flower_ext_meta);
 		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
-		*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+		if (ipv6) {
+			*key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6;
+			*key_size += sizeof(struct nfp_flower_ipv6_udp_tun);
+		} else {
+			*key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+		}
 
 		if (!enc_op)
 			break;
@@ -200,8 +220,8 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
 			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload");
 			return -EOPNOTSUPP;
 		}
-		err = nfp_flower_calc_opt_layer(enc_op, key_layer_two,
-						key_size, extack);
+		err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, key_size,
+						ipv6, extack);
 		if (err)
 			return err;
 		break;
@@ -237,6 +257,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 
 	/* If any tun dissector is used then the required set must be used. */
 	if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
+	    (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R)
+	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R &&
 	    (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
 	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported");
@@ -268,8 +290,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
 		struct flow_match_enc_opts enc_op = { NULL, NULL };
 		struct flow_match_ipv4_addrs ipv4_addrs;
+		struct flow_match_ipv6_addrs ipv6_addrs;
 		struct flow_match_control enc_ctl;
 		struct flow_match_ports enc_ports;
+		bool ipv6_tun = false;
 
 		flow_rule_match_enc_control(rule, &enc_ctl);
 
@@ -277,38 +301,62 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported");
 			return -EOPNOTSUPP;
 		}
-		if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported");
+
+		ipv6_tun = enc_ctl.key->addr_type ==
+				FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (ipv6_tun &&
+		    !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: firmware does not support IPv6 tunnels");
 			return -EOPNOTSUPP;
 		}
 
-		/* These fields are already verified as used. */
-		flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs);
-		if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) {
-			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported");
+		if (!ipv6_tun &&
+		    enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+			NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel address type not IPv4 or IPv6");
 			return -EOPNOTSUPP;
 		}
 
+		if (ipv6_tun) {
+			flow_rule_match_enc_ipv6_addrs(rule, &ipv6_addrs);
+			if (memchr_inv(&ipv6_addrs.mask->dst, 0xff,
+				       sizeof(ipv6_addrs.mask->dst))) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv6 destination address is supported");
+				return -EOPNOTSUPP;
+			}
+		} else {
+			flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs);
+			if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported");
+				return -EOPNOTSUPP;
+			}
+		}
+
 		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
 			flow_rule_match_enc_opts(rule, &enc_op);
 
-
 		if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
 			/* check if GRE, which has no enc_ports */
-			if (netif_is_gretap(netdev)) {
-				*tun_type = NFP_FL_TUNNEL_GRE;
-				key_layer |= NFP_FLOWER_LAYER_EXT_META;
-				key_size += sizeof(struct nfp_flower_ext_meta);
-				key_layer_two |= NFP_FLOWER_LAYER2_GRE;
-				key_size +=
-					sizeof(struct nfp_flower_ipv4_gre_tun);
+			if (!netif_is_gretap(netdev)) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels");
+				return -EOPNOTSUPP;
+			}
 
-				if (enc_op.key) {
-					NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels");
-					return -EOPNOTSUPP;
-				}
+			*tun_type = NFP_FL_TUNNEL_GRE;
+			key_layer |= NFP_FLOWER_LAYER_EXT_META;
+			key_size += sizeof(struct nfp_flower_ext_meta);
+			key_layer_two |= NFP_FLOWER_LAYER2_GRE;
+
+			if (ipv6_tun) {
+				key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6;
+				key_size +=
+					sizeof(struct nfp_flower_ipv6_udp_tun);
 			} else {
-				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels");
+				key_size +=
+					sizeof(struct nfp_flower_ipv4_udp_tun);
+			}
+
+			if (enc_op.key) {
+				NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels");
 				return -EOPNOTSUPP;
 			}
 		} else {
@@ -323,7 +371,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 							    &key_layer_two,
 							    &key_layer,
 							    &key_size, priv,
-							    tun_type, extack);
+							    tun_type, ipv6_tun,
+							    extack);
 			if (err)
 				return err;
 
@@ -491,6 +540,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
 		goto err_free_mask;
 
 	flow_pay->nfp_tun_ipv4_addr = 0;
+	flow_pay->nfp_tun_ipv6 = NULL;
 	flow_pay->meta.flags = 0;
 	INIT_LIST_HEAD(&flow_pay->linked_flows);
 	flow_pay->in_hw = false;
@@ -517,10 +567,12 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
 	struct nfp_fl_set_ip4_addrs *ipv4_add;
 	struct nfp_fl_set_ipv6_addr *ipv6_add;
 	struct nfp_fl_push_vlan *push_vlan;
+	struct nfp_fl_pre_tunnel *pre_tun;
 	struct nfp_fl_set_tport *tport;
 	struct nfp_fl_set_eth *eth;
 	struct nfp_fl_act_head *a;
 	unsigned int act_off = 0;
+	bool ipv6_tun = false;
 	u8 act_id = 0;
 	u8 *ports;
 	int i;
@@ -542,14 +594,18 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
 		case NFP_FL_ACTION_OPCODE_POP_VLAN:
 			merge->tci = cpu_to_be16(0);
 			break;
-		case NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL:
+		case NFP_FL_ACTION_OPCODE_SET_TUNNEL:
 			/* New tunnel header means l2 to l4 can be matched. */
 			eth_broadcast_addr(&merge->l2.mac_dst[0]);
 			eth_broadcast_addr(&merge->l2.mac_src[0]);
 			memset(&merge->l4, 0xff,
 			       sizeof(struct nfp_flower_tp_ports));
-			memset(&merge->ipv4, 0xff,
-			       sizeof(struct nfp_flower_ipv4));
+			if (ipv6_tun)
+				memset(&merge->ipv6, 0xff,
+				       sizeof(struct nfp_flower_ipv6));
+			else
+				memset(&merge->ipv4, 0xff,
+				       sizeof(struct nfp_flower_ipv4));
 			break;
 		case NFP_FL_ACTION_OPCODE_SET_ETHERNET:
 			eth = (struct nfp_fl_set_eth *)a;
@@ -597,6 +653,10 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
 				ports[i] |= tport->tp_port_mask[i];
 			break;
 		case NFP_FL_ACTION_OPCODE_PRE_TUNNEL:
+			pre_tun = (struct nfp_fl_pre_tunnel *)a;
+			ipv6_tun = be16_to_cpu(pre_tun->flags) &
+					NFP_FL_PRE_TUN_IPV6;
+			break;
 		case NFP_FL_ACTION_OPCODE_PRE_LAG:
 		case NFP_FL_ACTION_OPCODE_PUSH_GENEVE:
 			break;
@@ -765,15 +825,15 @@ nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan)
 static int
 nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan)
 {
-	struct nfp_fl_set_ipv4_tun *tun;
+	struct nfp_fl_set_tun *tun;
 	struct nfp_fl_act_head *a;
 	unsigned int act_off = 0;
 
 	while (act_off < len) {
 		a = (struct nfp_fl_act_head *)&acts[act_off];
 
-		if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL) {
-			tun = (struct nfp_fl_set_ipv4_tun *)a;
+		if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_TUNNEL) {
+			tun = (struct nfp_fl_set_tun *)a;
 			tun->outer_vlan_tpid = vlan->vlan_tpid;
 			tun->outer_vlan_tci = vlan->vlan_tci;
 
@@ -1058,15 +1118,22 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
 		return -EOPNOTSUPP;
 	}
 
-	if (key_layer & NFP_FLOWER_LAYER_IPV4) {
+	if (key_layer & NFP_FLOWER_LAYER_IPV4 ||
+	    key_layer & NFP_FLOWER_LAYER_IPV6) {
+		/* Flags and proto fields have same offset in IPv4 and IPv6. */
 		int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags);
 		int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto);
+		int size;
 		int i;
 
+		size = key_layer & NFP_FLOWER_LAYER_IPV4 ?
+			sizeof(struct nfp_flower_ipv4) :
+			sizeof(struct nfp_flower_ipv6);
+
 		mask += sizeof(struct nfp_flower_mac_mpls);
 
 		/* Ensure proto and flags are the only IP layer fields. */
-		for (i = 0; i < sizeof(struct nfp_flower_ipv4); i++)
+		for (i = 0; i < size; i++)
 			if (mask[i] && i != ip_flags && i != ip_proto) {
 				NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header");
 				return -EOPNOTSUPP;
@@ -1195,6 +1262,8 @@ err_remove_rhash:
 err_release_metadata:
 	nfp_modify_flow_metadata(app, flow_pay);
 err_destroy_flow:
+	if (flow_pay->nfp_tun_ipv6)
+		nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6);
 	kfree(flow_pay->action_data);
 	kfree(flow_pay->mask_data);
 	kfree(flow_pay->unmasked_data);
@@ -1311,6 +1380,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
 	if (nfp_flow->nfp_tun_ipv4_addr)
 		nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
 
+	if (nfp_flow->nfp_tun_ipv6)
+		nfp_tunnel_put_ipv6_off(app, nfp_flow->nfp_tun_ipv6);
+
 	if (!nfp_flow->in_hw) {
 		err = 0;
 		goto err_free_merge_flow;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 2600ce476d6b..2df3deedf9fd 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -55,6 +55,25 @@ struct nfp_tun_active_tuns {
 };
 
 /**
+ * struct nfp_tun_active_tuns_v6 - periodic message of active IPv6 tunnels
+ * @seq:		sequence number of the message
+ * @count:		number of tunnels report in message
+ * @flags:		options part of the request
+ * @tun_info.ipv6:		dest IPv6 address of active route
+ * @tun_info.egress_port:	port the encapsulated packet egressed
+ * @tun_info:		tunnels that have sent traffic in reported period
+ */
+struct nfp_tun_active_tuns_v6 {
+	__be32 seq;
+	__be32 count;
+	__be32 flags;
+	struct route_ip_info_v6 {
+		struct in6_addr ipv6;
+		__be32 egress_port;
+	} tun_info[];
+};
+
+/**
  * struct nfp_tun_neigh - neighbour/route entry on the NFP
  * @dst_ipv4:	destination IPv4 address
  * @src_ipv4:	source IPv4 address
@@ -71,6 +90,22 @@ struct nfp_tun_neigh {
 };
 
 /**
+ * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP
+ * @dst_ipv6:	destination IPv6 address
+ * @src_ipv6:	source IPv6 address
+ * @dst_addr:	destination MAC address
+ * @src_addr:	source MAC address
+ * @port_id:	NFP port to output packet on - associated with source IPv6
+ */
+struct nfp_tun_neigh_v6 {
+	struct in6_addr dst_ipv6;
+	struct in6_addr src_ipv6;
+	u8 dst_addr[ETH_ALEN];
+	u8 src_addr[ETH_ALEN];
+	__be32 port_id;
+};
+
+/**
  * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup
  * @ingress_port:	ingress port of packet that signalled request
  * @ipv4_addr:		destination ipv4 address for route
@@ -83,13 +118,23 @@ struct nfp_tun_req_route_ipv4 {
 };
 
 /**
- * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP
- * @ipv4_addr:	destination of route
+ * struct nfp_tun_req_route_ipv6 - NFP requests an IPv6 route/neighbour lookup
+ * @ingress_port:	ingress port of packet that signalled request
+ * @ipv6_addr:		destination ipv6 address for route
+ */
+struct nfp_tun_req_route_ipv6 {
+	__be32 ingress_port;
+	struct in6_addr ipv6_addr;
+};
+
+/**
+ * struct nfp_offloaded_route - routes that are offloaded to the NFP
  * @list:	list pointer
+ * @ip_add:	destination of route - can be IPv4 or IPv6
  */
-struct nfp_ipv4_route_entry {
-	__be32 ipv4_addr;
+struct nfp_offloaded_route {
 	struct list_head list;
+	u8 ip_add[];
 };
 
 #define NFP_FL_IPV4_ADDRS_MAX        32
@@ -116,6 +161,18 @@ struct nfp_ipv4_addr_entry {
 	struct list_head list;
 };
 
+#define NFP_FL_IPV6_ADDRS_MAX        4
+
+/**
+ * struct nfp_tun_ipv6_addr - set the IP address list on the NFP
+ * @count:	number of IPs populated in the array
+ * @ipv6_addr:	array of IPV6_ADDRS_MAX 128 bit IPv6 addresses
+ */
+struct nfp_tun_ipv6_addr {
+	__be32 count;
+	struct in6_addr ipv6_addr[NFP_FL_IPV6_ADDRS_MAX];
+};
+
 #define NFP_TUN_MAC_OFFLOAD_DEL_FLAG	0x2
 
 /**
@@ -206,6 +263,49 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct nfp_tun_active_tuns_v6 *payload;
+	struct net_device *netdev;
+	int count, i, pay_len;
+	struct neighbour *n;
+	void *ipv6_add;
+	u32 port;
+
+	payload = nfp_flower_cmsg_get_data(skb);
+	count = be32_to_cpu(payload->count);
+	if (count > NFP_FL_IPV6_ADDRS_MAX) {
+		nfp_flower_cmsg_warn(app, "IPv6 tunnel keep-alive request exceeds max routes.\n");
+		return;
+	}
+
+	pay_len = nfp_flower_cmsg_get_data_len(skb);
+	if (pay_len != struct_size(payload, tun_info, count)) {
+		nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n");
+		return;
+	}
+
+	rcu_read_lock();
+	for (i = 0; i < count; i++) {
+		ipv6_add = &payload->tun_info[i].ipv6;
+		port = be32_to_cpu(payload->tun_info[i].egress_port);
+		netdev = nfp_app_dev_get(app, port, NULL);
+		if (!netdev)
+			continue;
+
+		n = neigh_lookup(&nd_tbl, ipv6_add, netdev);
+		if (!n)
+			continue;
+
+		/* Update the used timestamp of neighbour */
+		neigh_event_send(n, NULL);
+		neigh_release(n);
+	}
+	rcu_read_unlock();
+#endif
+}
+
 static int
 nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata,
 			 gfp_t flag)
@@ -224,71 +324,126 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata,
 	return 0;
 }
 
-static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr)
+static bool
+__nfp_tun_has_route(struct list_head *route_list, spinlock_t *list_lock,
+		    void *add, int add_len)
 {
-	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_ipv4_route_entry *entry;
-	struct list_head *ptr, *storage;
+	struct nfp_offloaded_route *entry;
 
-	spin_lock_bh(&priv->tun.neigh_off_lock);
-	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
-		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
-		if (entry->ipv4_addr == ipv4_addr) {
-			spin_unlock_bh(&priv->tun.neigh_off_lock);
+	spin_lock_bh(list_lock);
+	list_for_each_entry(entry, route_list, list)
+		if (!memcmp(entry->ip_add, add, add_len)) {
+			spin_unlock_bh(list_lock);
 			return true;
 		}
-	}
-	spin_unlock_bh(&priv->tun.neigh_off_lock);
+	spin_unlock_bh(list_lock);
 	return false;
 }
 
-static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr)
+static int
+__nfp_tun_add_route_to_cache(struct list_head *route_list,
+			     spinlock_t *list_lock, void *add, int add_len)
 {
-	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_ipv4_route_entry *entry;
-	struct list_head *ptr, *storage;
+	struct nfp_offloaded_route *entry;
 
-	spin_lock_bh(&priv->tun.neigh_off_lock);
-	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
-		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
-		if (entry->ipv4_addr == ipv4_addr) {
-			spin_unlock_bh(&priv->tun.neigh_off_lock);
-			return;
+	spin_lock_bh(list_lock);
+	list_for_each_entry(entry, route_list, list)
+		if (!memcmp(entry->ip_add, add, add_len)) {
+			spin_unlock_bh(list_lock);
+			return 0;
 		}
-	}
-	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+
+	entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC);
 	if (!entry) {
-		spin_unlock_bh(&priv->tun.neigh_off_lock);
-		nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n");
-		return;
+		spin_unlock_bh(list_lock);
+		return -ENOMEM;
 	}
 
-	entry->ipv4_addr = ipv4_addr;
-	list_add_tail(&entry->list, &priv->tun.neigh_off_list);
-	spin_unlock_bh(&priv->tun.neigh_off_lock);
+	memcpy(entry->ip_add, add, add_len);
+	list_add_tail(&entry->list, route_list);
+	spin_unlock_bh(list_lock);
+
+	return 0;
 }
 
-static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
+static void
+__nfp_tun_del_route_from_cache(struct list_head *route_list,
+			       spinlock_t *list_lock, void *add, int add_len)
 {
-	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_ipv4_route_entry *entry;
-	struct list_head *ptr, *storage;
+	struct nfp_offloaded_route *entry;
 
-	spin_lock_bh(&priv->tun.neigh_off_lock);
-	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
-		entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
-		if (entry->ipv4_addr == ipv4_addr) {
+	spin_lock_bh(list_lock);
+	list_for_each_entry(entry, route_list, list)
+		if (!memcmp(entry->ip_add, add, add_len)) {
 			list_del(&entry->list);
 			kfree(entry);
 			break;
 		}
-	}
-	spin_unlock_bh(&priv->tun.neigh_off_lock);
+	spin_unlock_bh(list_lock);
+}
+
+static bool nfp_tun_has_route_v4(struct nfp_app *app, __be32 *ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	return __nfp_tun_has_route(&priv->tun.neigh_off_list_v4,
+				   &priv->tun.neigh_off_lock_v4, ipv4_addr,
+				   sizeof(*ipv4_addr));
+}
+
+static bool
+nfp_tun_has_route_v6(struct nfp_app *app, struct in6_addr *ipv6_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	return __nfp_tun_has_route(&priv->tun.neigh_off_list_v6,
+				   &priv->tun.neigh_off_lock_v6, ipv6_addr,
+				   sizeof(*ipv6_addr));
+}
+
+static void
+nfp_tun_add_route_to_cache_v4(struct nfp_app *app, __be32 *ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	__nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v4,
+				     &priv->tun.neigh_off_lock_v4, ipv4_addr,
+				     sizeof(*ipv4_addr));
+}
+
+static void
+nfp_tun_add_route_to_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	__nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v6,
+				     &priv->tun.neigh_off_lock_v6, ipv6_addr,
+				     sizeof(*ipv6_addr));
 }
 
 static void
-nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
-		    struct flowi4 *flow, struct neighbour *neigh, gfp_t flag)
+nfp_tun_del_route_from_cache_v4(struct nfp_app *app, __be32 *ipv4_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	__nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v4,
+				       &priv->tun.neigh_off_lock_v4, ipv4_addr,
+				       sizeof(*ipv4_addr));
+}
+
+static void
+nfp_tun_del_route_from_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	__nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v6,
+				       &priv->tun.neigh_off_lock_v6, ipv6_addr,
+				       sizeof(*ipv6_addr));
+}
+
+static void
+nfp_tun_write_neigh_v4(struct net_device *netdev, struct nfp_app *app,
+		       struct flowi4 *flow, struct neighbour *neigh, gfp_t flag)
 {
 	struct nfp_tun_neigh payload;
 	u32 port_id;
@@ -302,7 +457,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
 
 	/* If entry has expired send dst IP with all other fields 0. */
 	if (!(neigh->nud_state & NUD_VALID) || neigh->dead) {
-		nfp_tun_del_route_from_cache(app, payload.dst_ipv4);
+		nfp_tun_del_route_from_cache_v4(app, &payload.dst_ipv4);
 		/* Trigger ARP to verify invalid neighbour state. */
 		neigh_event_send(neigh, NULL);
 		goto send_msg;
@@ -314,7 +469,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
 	neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
 	payload.port_id = cpu_to_be32(port_id);
 	/* Add destination of new route to NFP cache. */
-	nfp_tun_add_route_to_cache(app, payload.dst_ipv4);
+	nfp_tun_add_route_to_cache_v4(app, &payload.dst_ipv4);
 
 send_msg:
 	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH,
@@ -322,16 +477,54 @@ send_msg:
 				 (unsigned char *)&payload, flag);
 }
 
+static void
+nfp_tun_write_neigh_v6(struct net_device *netdev, struct nfp_app *app,
+		       struct flowi6 *flow, struct neighbour *neigh, gfp_t flag)
+{
+	struct nfp_tun_neigh_v6 payload;
+	u32 port_id;
+
+	port_id = nfp_flower_get_port_id_from_netdev(app, netdev);
+	if (!port_id)
+		return;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_neigh_v6));
+	payload.dst_ipv6 = flow->daddr;
+
+	/* If entry has expired send dst IP with all other fields 0. */
+	if (!(neigh->nud_state & NUD_VALID) || neigh->dead) {
+		nfp_tun_del_route_from_cache_v6(app, &payload.dst_ipv6);
+		/* Trigger probe to verify invalid neighbour state. */
+		neigh_event_send(neigh, NULL);
+		goto send_msg;
+	}
+
+	/* Have a valid neighbour so populate rest of entry. */
+	payload.src_ipv6 = flow->saddr;
+	ether_addr_copy(payload.src_addr, netdev->dev_addr);
+	neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
+	payload.port_id = cpu_to_be32(port_id);
+	/* Add destination of new route to NFP cache. */
+	nfp_tun_add_route_to_cache_v6(app, &payload.dst_ipv6);
+
+send_msg:
+	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6,
+				 sizeof(struct nfp_tun_neigh_v6),
+				 (unsigned char *)&payload, flag);
+}
+
 static int
 nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
 			    void *ptr)
 {
 	struct nfp_flower_priv *app_priv;
 	struct netevent_redirect *redir;
-	struct flowi4 flow = {};
+	struct flowi4 flow4 = {};
+	struct flowi6 flow6 = {};
 	struct neighbour *n;
 	struct nfp_app *app;
 	struct rtable *rt;
+	bool ipv6 = false;
 	int err;
 
 	switch (event) {
@@ -346,7 +539,13 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
 		return NOTIFY_DONE;
 	}
 
-	flow.daddr = *(__be32 *)n->primary_key;
+	if (n->tbl->family == AF_INET6)
+		ipv6 = true;
+
+	if (ipv6)
+		flow6.daddr = *(struct in6_addr *)n->primary_key;
+	else
+		flow4.daddr = *(__be32 *)n->primary_key;
 
 	app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb);
 	app = app_priv->app;
@@ -356,28 +555,46 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
 		return NOTIFY_DONE;
 
 	/* Only concerned with changes to routes already added to NFP. */
-	if (!nfp_tun_has_route(app, flow.daddr))
+	if ((ipv6 && !nfp_tun_has_route_v6(app, &flow6.daddr)) ||
+	    (!ipv6 && !nfp_tun_has_route_v4(app, &flow4.daddr)))
 		return NOTIFY_DONE;
 
 #if IS_ENABLED(CONFIG_INET)
-	/* Do a route lookup to populate flow data. */
-	rt = ip_route_output_key(dev_net(n->dev), &flow);
-	err = PTR_ERR_OR_ZERO(rt);
-	if (err)
+	if (ipv6) {
+#if IS_ENABLED(CONFIG_IPV6)
+		struct dst_entry *dst;
+
+		dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(n->dev), NULL,
+						      &flow6, NULL);
+		if (IS_ERR(dst))
+			return NOTIFY_DONE;
+
+		dst_release(dst);
+		flow6.flowi6_proto = IPPROTO_UDP;
+		nfp_tun_write_neigh_v6(n->dev, app, &flow6, n, GFP_ATOMIC);
+#else
 		return NOTIFY_DONE;
+#endif /* CONFIG_IPV6 */
+	} else {
+		/* Do a route lookup to populate flow data. */
+		rt = ip_route_output_key(dev_net(n->dev), &flow4);
+		err = PTR_ERR_OR_ZERO(rt);
+		if (err)
+			return NOTIFY_DONE;
 
-	ip_rt_put(rt);
+		ip_rt_put(rt);
+
+		flow4.flowi4_proto = IPPROTO_UDP;
+		nfp_tun_write_neigh_v4(n->dev, app, &flow4, n, GFP_ATOMIC);
+	}
 #else
 	return NOTIFY_DONE;
-#endif
-
-	flow.flowi4_proto = IPPROTO_UDP;
-	nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC);
+#endif /* CONFIG_INET */
 
 	return NOTIFY_OK;
 }
 
-void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb)
+void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb)
 {
 	struct nfp_tun_req_route_ipv4 *payload;
 	struct net_device *netdev;
@@ -411,7 +628,7 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb)
 	ip_rt_put(rt);
 	if (!n)
 		goto fail_rcu_unlock;
-	nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC);
+	nfp_tun_write_neigh_v4(n->dev, app, &flow, n, GFP_ATOMIC);
 	neigh_release(n);
 	rcu_read_unlock();
 	return;
@@ -421,6 +638,48 @@ fail_rcu_unlock:
 	nfp_flower_cmsg_warn(app, "Requested route not found.\n");
 }
 
+void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_tun_req_route_ipv6 *payload;
+	struct net_device *netdev;
+	struct flowi6 flow = {};
+	struct dst_entry *dst;
+	struct neighbour *n;
+
+	payload = nfp_flower_cmsg_get_data(skb);
+
+	rcu_read_lock();
+	netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL);
+	if (!netdev)
+		goto fail_rcu_unlock;
+
+	flow.daddr = payload->ipv6_addr;
+	flow.flowi6_proto = IPPROTO_UDP;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+	dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow,
+					      NULL);
+	if (IS_ERR(dst))
+		goto fail_rcu_unlock;
+#else
+	goto fail_rcu_unlock;
+#endif
+
+	n = dst_neigh_lookup(dst, &flow.daddr);
+	dst_release(dst);
+	if (!n)
+		goto fail_rcu_unlock;
+
+	nfp_tun_write_neigh_v6(n->dev, app, &flow, n, GFP_ATOMIC);
+	neigh_release(n);
+	rcu_read_unlock();
+	return;
+
+fail_rcu_unlock:
+	rcu_read_unlock();
+	nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n");
+}
+
 static void nfp_tun_write_ipv4_list(struct nfp_app *app)
 {
 	struct nfp_flower_priv *priv = app->priv;
@@ -502,6 +761,78 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4)
 	nfp_tun_write_ipv4_list(app);
 }
 
+static void nfp_tun_write_ipv6_list(struct nfp_app *app)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv6_addr_entry *entry;
+	struct nfp_tun_ipv6_addr payload;
+	int count = 0;
+
+	memset(&payload, 0, sizeof(struct nfp_tun_ipv6_addr));
+	mutex_lock(&priv->tun.ipv6_off_lock);
+	list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) {
+		if (count >= NFP_FL_IPV6_ADDRS_MAX) {
+			nfp_flower_cmsg_warn(app, "Too many IPv6 tunnel endpoint addresses, some cannot be offloaded.\n");
+			break;
+		}
+		payload.ipv6_addr[count++] = entry->ipv6_addr;
+	}
+	mutex_unlock(&priv->tun.ipv6_off_lock);
+	payload.count = cpu_to_be32(count);
+
+	nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6,
+				 sizeof(struct nfp_tun_ipv6_addr),
+				 &payload, GFP_KERNEL);
+}
+
+struct nfp_ipv6_addr_entry *
+nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	struct nfp_ipv6_addr_entry *entry;
+
+	mutex_lock(&priv->tun.ipv6_off_lock);
+	list_for_each_entry(entry, &priv->tun.ipv6_off_list, list)
+		if (!memcmp(&entry->ipv6_addr, ipv6, sizeof(*ipv6))) {
+			entry->ref_count++;
+			mutex_unlock(&priv->tun.ipv6_off_lock);
+			return entry;
+		}
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		mutex_unlock(&priv->tun.ipv6_off_lock);
+		nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n");
+		return NULL;
+	}
+	entry->ipv6_addr = *ipv6;
+	entry->ref_count = 1;
+	list_add_tail(&entry->list, &priv->tun.ipv6_off_list);
+	mutex_unlock(&priv->tun.ipv6_off_lock);
+
+	nfp_tun_write_ipv6_list(app);
+
+	return entry;
+}
+
+void
+nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	bool freed = false;
+
+	mutex_lock(&priv->tun.ipv6_off_lock);
+	if (!--entry->ref_count) {
+		list_del(&entry->list);
+		kfree(entry);
+		freed = true;
+	}
+	mutex_unlock(&priv->tun.ipv6_off_lock);
+
+	if (freed)
+		nfp_tun_write_ipv6_list(app);
+}
+
 static int
 __nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del)
 {
@@ -1013,13 +1344,17 @@ int nfp_tunnel_config_start(struct nfp_app *app)
 
 	ida_init(&priv->tun.mac_off_ids);
 
-	/* Initialise priv data for IPv4 offloading. */
+	/* Initialise priv data for IPv4/v6 offloading. */
 	mutex_init(&priv->tun.ipv4_off_lock);
 	INIT_LIST_HEAD(&priv->tun.ipv4_off_list);
+	mutex_init(&priv->tun.ipv6_off_lock);
+	INIT_LIST_HEAD(&priv->tun.ipv6_off_list);
 
 	/* Initialise priv data for neighbour offloading. */
-	spin_lock_init(&priv->tun.neigh_off_lock);
-	INIT_LIST_HEAD(&priv->tun.neigh_off_list);
+	spin_lock_init(&priv->tun.neigh_off_lock_v4);
+	INIT_LIST_HEAD(&priv->tun.neigh_off_list_v4);
+	spin_lock_init(&priv->tun.neigh_off_lock_v6);
+	INIT_LIST_HEAD(&priv->tun.neigh_off_list_v6);
 	priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
 
 	err = register_netevent_notifier(&priv->tun.neigh_nb);
@@ -1034,9 +1369,11 @@ int nfp_tunnel_config_start(struct nfp_app *app)
 
 void nfp_tunnel_config_stop(struct nfp_app *app)
 {
+	struct nfp_offloaded_route *route_entry, *temp;
 	struct nfp_flower_priv *priv = app->priv;
-	struct nfp_ipv4_route_entry *route_entry;
 	struct nfp_ipv4_addr_entry *ip_entry;
+	struct nfp_tun_neigh_v6 ipv6_route;
+	struct nfp_tun_neigh ipv4_route;
 	struct list_head *ptr, *storage;
 
 	unregister_netevent_notifier(&priv->tun.neigh_nb);
@@ -1050,12 +1387,35 @@ void nfp_tunnel_config_stop(struct nfp_app *app)
 		kfree(ip_entry);
 	}
 
-	/* Free any memory that may be occupied by the route list. */
-	list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
-		route_entry = list_entry(ptr, struct nfp_ipv4_route_entry,
-					 list);
+	mutex_destroy(&priv->tun.ipv6_off_lock);
+
+	/* Free memory in the route list and remove entries from fw cache. */
+	list_for_each_entry_safe(route_entry, temp,
+				 &priv->tun.neigh_off_list_v4, list) {
+		memset(&ipv4_route, 0, sizeof(ipv4_route));
+		memcpy(&ipv4_route.dst_ipv4, &route_entry->ip_add,
+		       sizeof(ipv4_route.dst_ipv4));
 		list_del(&route_entry->list);
 		kfree(route_entry);
+
+		nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH,
+					 sizeof(struct nfp_tun_neigh),
+					 (unsigned char *)&ipv4_route,
+					 GFP_KERNEL);
+	}
+
+	list_for_each_entry_safe(route_entry, temp,
+				 &priv->tun.neigh_off_list_v6, list) {
+		memset(&ipv6_route, 0, sizeof(ipv6_route));
+		memcpy(&ipv6_route.dst_ipv6, &route_entry->ip_add,
+		       sizeof(ipv6_route.dst_ipv6));
+		list_del(&route_entry->list);
+		kfree(route_entry);
+
+		nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6,
+					 sizeof(struct nfp_tun_neigh),
+					 (unsigned char *)&ipv6_route,
+					 GFP_KERNEL);
 	}
 
 	/* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 250f510b1d21..ff4438478ea9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -586,6 +586,9 @@ struct nfp_net_dp {
  * @ktls_conn_id_gen:	Trivial generator for kTLS connection ids (for TX)
  * @ktls_no_space:	Counter of firmware rejecting kTLS connection due to
  *			lack of space
+ * @ktls_rx_resync_req:	Counter of TLS RX resync requested
+ * @ktls_rx_resync_ign:	Counter of TLS RX resync requests ignored
+ * @ktls_rx_resync_sent:    Counter of TLS RX resync completed
  * @mbox_cmsg:		Common Control Message via vNIC mailbox state
  * @mbox_cmsg.queue:	CCM mbox queue of pending messages
  * @mbox_cmsg.wq:	CCM mbox wait queue of waiting processes
@@ -674,6 +677,9 @@ struct nfp_net {
 	atomic64_t ktls_conn_id_gen;
 
 	atomic_t ktls_no_space;
+	atomic_t ktls_rx_resync_req;
+	atomic_t ktls_rx_resync_ign;
+	atomic_t ktls_rx_resync_sent;
 
 	struct {
 		struct sk_buff_head queue;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index bcdcd6de7dea..9bfb3b077bc1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -47,6 +47,7 @@
 #include "nfp_net_sriov.h"
 #include "nfp_port.h"
 #include "crypto/crypto.h"
+#include "crypto/fw.h"
 
 /**
  * nfp_net_get_fw_version() - Read and parse the FW version
@@ -1321,17 +1322,11 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
 	netdev_tx_reset_queue(nd_q);
 }
 
-static void nfp_net_tx_timeout(struct net_device *netdev)
+static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
-	int i;
 
-	for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) {
-		if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
-			continue;
-		nn_warn(nn, "TX timeout on ring: %d\n", i);
-	}
-	nn_warn(nn, "TX watchdog timeout\n");
+	nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue);
 }
 
 /* Receive processing
@@ -1667,9 +1662,9 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
 			 &rx_hash->hash);
 }
 
-static void *
+static bool
 nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
-		   void *data, int meta_len)
+		   void *data, void *pkt, unsigned int pkt_len, int meta_len)
 {
 	u32 meta_info;
 
@@ -1699,14 +1694,20 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
 				(__force __wsum)__get_unaligned_cpu32(data);
 			data += 4;
 			break;
+		case NFP_NET_META_RESYNC_INFO:
+			if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
+						      pkt_len))
+				return NULL;
+			data += sizeof(struct nfp_net_tls_resync_req);
+			break;
 		default:
-			return NULL;
+			return true;
 		}
 
 		meta_info >>= NFP_NET_META_FIELD_SIZE;
 	}
 
-	return data;
+	return data != pkt;
 }
 
 static void
@@ -1891,12 +1892,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 			nfp_net_set_hash_desc(dp->netdev, &meta,
 					      rxbuf->frag + meta_off, rxd);
 		} else if (meta_len) {
-			void *end;
-
-			end = nfp_net_parse_meta(dp->netdev, &meta,
-						 rxbuf->frag + meta_off,
-						 meta_len);
-			if (unlikely(end != rxbuf->frag + pkt_off)) {
+			if (unlikely(nfp_net_parse_meta(dp->netdev, &meta,
+							rxbuf->frag + meta_off,
+							rxbuf->frag + pkt_off,
+							pkt_len, meta_len))) {
 				nn_dp_warn(dp, "invalid RX packet metadata\n");
 				nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
 						NULL);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
index d835c14b7257..c3a763134e79 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
@@ -17,6 +17,30 @@ static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps)
 	caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ;
 }
 
+static bool
+nfp_net_tls_parse_crypto_ops(struct device *dev, struct nfp_net_tlv_caps *caps,
+			     u8 __iomem *ctrl_mem, u8 __iomem *data,
+			     unsigned int length, unsigned int offset,
+			     bool rx_stream_scan)
+{
+	/* Ignore the legacy TLV if new one was already parsed */
+	if (caps->tls_resync_ss && !rx_stream_scan)
+		return true;
+
+	if (length < 32) {
+		dev_err(dev,
+			"CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n",
+			length, offset);
+		return false;
+	}
+
+	caps->crypto_ops = readl(data);
+	caps->crypto_enable_off = data - ctrl_mem + 16;
+	caps->tls_resync_ss = rx_stream_scan;
+
+	return true;
+}
+
 int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
 			   struct nfp_net_tlv_caps *caps)
 {
@@ -104,15 +128,25 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
 				caps->mbox_cmsg_types = readl(data);
 			break;
 		case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
-			if (length < 32) {
-				dev_err(dev,
-					"CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n",
-					length, offset);
+			if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem,
+							  data, length, offset,
+							  false))
 				return -EINVAL;
+			break;
+		case NFP_NET_CFG_TLV_TYPE_VNIC_STATS:
+			if ((data - ctrl_mem) % 8) {
+				dev_warn(dev, "VNIC STATS TLV misaligned, ignoring offset:%u len:%u\n",
+					 offset, length);
+				break;
 			}
-
-			caps->crypto_ops = readl(data);
-			caps->crypto_enable_off = data - ctrl_mem + 16;
+			caps->vnic_stats_off = data - ctrl_mem;
+			caps->vnic_stats_cnt = length / 10;
+			break;
+		case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN:
+			if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem,
+							  data, length, offset,
+							  true))
+				return -EINVAL;
 			break;
 		default:
 			if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr))
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index ee6b24e4eacd..3d61a8cb60b0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -45,6 +45,7 @@
 #define NFP_NET_META_PORTID		5
 #define NFP_NET_META_CSUM		6 /* checksum complete type */
 #define NFP_NET_META_CONN_HANDLE	7
+#define NFP_NET_META_RESYNC_INFO	8 /* RX resync info request */
 
 #define NFP_META_PORT_ID_CTRL		~0U
 
@@ -479,6 +480,22 @@
  * 8 words, bitmaps of supported and enabled crypto operations.
  * First 16B (4 words) contains a bitmap of supported crypto operations,
  * and next 16B contain the enabled operations.
+ * This capability is made obsolete by ones with better sync methods.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_VNIC_STATS:
+ * Variable, per-vNIC statistics, data should be 8B aligned (FW should insert
+ * zero-length RESERVED TLV to pad).
+ * TLV data has two sections.  First is an array of statistics' IDs (2B each).
+ * Second 8B statistics themselves.  Statistics are 8B aligned, meaning there
+ * may be a padding between sections.
+ * Number of statistics can be determined as floor(tlv.length / (2 + 8)).
+ * This TLV overwrites %NFP_NET_CFG_STATS_* values (statistics in this TLV
+ * duplicate the old ones, so driver should be careful not to unnecessarily
+ * render both).
+ *
+ * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN:
+ * Same as %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS, but crypto TLS does stream scan
+ * RX sync, rather than kernel-assisted sync.
  */
 #define NFP_NET_CFG_TLV_TYPE_UNKNOWN		0
 #define NFP_NET_CFG_TLV_TYPE_RESERVED		1
@@ -490,6 +507,8 @@
 #define NFP_NET_CFG_TLV_TYPE_REPR_CAP		7
 #define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES	10
 #define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS		11 /* see crypto/fw.h */
+#define NFP_NET_CFG_TLV_TYPE_VNIC_STATS		12
+#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN	13
 
 struct device;
 
@@ -502,6 +521,9 @@ struct device;
  * @mbox_cmsg_types:	cmsgs which can be passed through the mailbox
  * @crypto_ops:		supported crypto operations
  * @crypto_enable_off:	offset of crypto ops enable region
+ * @vnic_stats_off:	offset of vNIC stats area
+ * @vnic_stats_cnt:	number of vNIC stats
+ * @tls_resync_ss:	TLS resync will be performed via stream scan
  */
 struct nfp_net_tlv_caps {
 	u32 me_freq_mhz;
@@ -511,6 +533,9 @@ struct nfp_net_tlv_caps {
 	u32 mbox_cmsg_types;
 	u32 crypto_ops;
 	unsigned int crypto_enable_off;
+	unsigned int vnic_stats_off;
+	unsigned int vnic_stats_cnt;
+	unsigned int tls_resync_ss:1;
 };
 
 int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 1b840ee47339..d648e32c0520 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -148,11 +148,33 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = {
 	{ "tx_pause_frames_class7",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7, },
 };
 
+static const char nfp_tlv_stat_names[][ETH_GSTRING_LEN] = {
+	[1]	= "dev_rx_discards",
+	[2]	= "dev_rx_errors",
+	[3]	= "dev_rx_bytes",
+	[4]	= "dev_rx_uc_bytes",
+	[5]	= "dev_rx_mc_bytes",
+	[6]	= "dev_rx_bc_bytes",
+	[7]	= "dev_rx_pkts",
+	[8]	= "dev_rx_mc_pkts",
+	[9]	= "dev_rx_bc_pkts",
+
+	[10]	= "dev_tx_discards",
+	[11]	= "dev_tx_errors",
+	[12]	= "dev_tx_bytes",
+	[13]	= "dev_tx_uc_bytes",
+	[14]	= "dev_tx_mc_bytes",
+	[15]	= "dev_tx_bc_bytes",
+	[16]	= "dev_tx_pkts",
+	[17]	= "dev_tx_mc_pkts",
+	[18]	= "dev_tx_bc_pkts",
+};
+
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
 #define NN_ET_SWITCH_STATS_LEN 9
 #define NN_RVEC_GATHER_STATS	13
 #define NN_RVEC_PER_Q_STATS	3
-#define NN_CTRL_PATH_STATS	1
+#define NN_CTRL_PATH_STATS	4
 
 #define SFP_SFF_REV_COMPLIANCE	1
 
@@ -454,6 +476,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 	data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
 
 	data = nfp_pr_et(data, "hw_tls_no_space");
+	data = nfp_pr_et(data, "rx_tls_resync_req_ok");
+	data = nfp_pr_et(data, "rx_tls_resync_req_ign");
+	data = nfp_pr_et(data, "rx_tls_resync_sent");
 
 	return data;
 }
@@ -502,6 +527,9 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 		*data++ = gathered_stats[j];
 
 	*data++ = atomic_read(&nn->ktls_no_space);
+	*data++ = atomic_read(&nn->ktls_rx_resync_req);
+	*data++ = atomic_read(&nn->ktls_rx_resync_ign);
+	*data++ = atomic_read(&nn->ktls_rx_resync_sent);
 
 	return data;
 }
@@ -560,6 +588,65 @@ nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs)
 	return data;
 }
 
+static unsigned int nfp_vnic_get_tlv_stats_count(struct nfp_net *nn)
+{
+	return nn->tlv_caps.vnic_stats_cnt + nn->max_r_vecs * 4;
+}
+
+static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data)
+{
+	unsigned int i, id;
+	u8 __iomem *mem;
+	u64 id_word = 0;
+
+	mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off;
+	for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) {
+		if (!(i % 4))
+			id_word = readq(mem + i * 2);
+
+		id = (u16)id_word;
+		id_word >>= 16;
+
+		if (id < ARRAY_SIZE(nfp_tlv_stat_names) &&
+		    nfp_tlv_stat_names[id][0]) {
+			memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN);
+			data += ETH_GSTRING_LEN;
+		} else {
+			data = nfp_pr_et(data, "dev_unknown_stat%u", id);
+		}
+	}
+
+	for (i = 0; i < nn->max_r_vecs; i++) {
+		data = nfp_pr_et(data, "rxq_%u_pkts", i);
+		data = nfp_pr_et(data, "rxq_%u_bytes", i);
+		data = nfp_pr_et(data, "txq_%u_pkts", i);
+		data = nfp_pr_et(data, "txq_%u_bytes", i);
+	}
+
+	return data;
+}
+
+static u64 *nfp_vnic_get_tlv_stats(struct nfp_net *nn, u64 *data)
+{
+	u8 __iomem *mem;
+	unsigned int i;
+
+	mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off;
+	mem += roundup(2 * nn->tlv_caps.vnic_stats_cnt, 8);
+	for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++)
+		*data++ = readq(mem + i * 8);
+
+	mem = nn->dp.ctrl_bar;
+	for (i = 0; i < nn->max_r_vecs; i++) {
+		*data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i));
+		*data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8);
+		*data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i));
+		*data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8);
+	}
+
+	return data;
+}
+
 static unsigned int nfp_mac_get_stats_count(struct net_device *netdev)
 {
 	struct nfp_port *port;
@@ -609,8 +696,12 @@ static void nfp_net_get_strings(struct net_device *netdev,
 	switch (stringset) {
 	case ETH_SS_STATS:
 		data = nfp_vnic_get_sw_stats_strings(netdev, data);
-		data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs,
-						     false);
+		if (!nn->tlv_caps.vnic_stats_off)
+			data = nfp_vnic_get_hw_stats_strings(data,
+							     nn->max_r_vecs,
+							     false);
+		else
+			data = nfp_vnic_get_tlv_stats_strings(nn, data);
 		data = nfp_mac_get_stats_strings(netdev, data);
 		data = nfp_app_port_get_stats_strings(nn->port, data);
 		break;
@@ -624,7 +715,11 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
 	struct nfp_net *nn = netdev_priv(netdev);
 
 	data = nfp_vnic_get_sw_stats(netdev, data);
-	data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs);
+	if (!nn->tlv_caps.vnic_stats_off)
+		data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar,
+					     nn->max_r_vecs);
+	else
+		data = nfp_vnic_get_tlv_stats(nn, data);
 	data = nfp_mac_get_stats(netdev, data);
 	data = nfp_app_port_get_stats(nn->port, data);
 }
@@ -632,13 +727,18 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
 static int nfp_net_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
+	unsigned int cnt;
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return nfp_vnic_get_sw_stats_count(netdev) +
-		       nfp_vnic_get_hw_stats_count(nn->max_r_vecs) +
-		       nfp_mac_get_stats_count(netdev) +
-		       nfp_app_port_get_stats_count(nn->port);
+		cnt = nfp_vnic_get_sw_stats_count(netdev);
+		if (!nn->tlv_caps.vnic_stats_off)
+			cnt += nfp_vnic_get_hw_stats_count(nn->max_r_vecs);
+		else
+			cnt += nfp_vnic_get_tlv_stats_count(nn);
+		cnt += nfp_mac_get_stats_count(netdev);
+		cnt += nfp_app_port_get_stats_count(nn->port);
+		return cnt;
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index e4977cdf7678..c0e2f4394aef 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -106,7 +106,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 	 * first NFP_NET_CFG_BAR_SZ of the BAR.  This keeps the code
 	 * the identical for PF and VF drivers.
 	 */
-	ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CTRL_BAR),
+	ctrl_bar = ioremap(pci_resource_start(pdev, NFP_NET_CTRL_BAR),
 				   NFP_NET_CFG_BAR_SZ);
 	if (!ctrl_bar) {
 		dev_err(&pdev->dev,
@@ -200,7 +200,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 			bar_sz = (rx_bar_off + rx_bar_sz) - bar_off;
 
 		map_addr = pci_resource_start(pdev, tx_bar_no) + bar_off;
-		vf->q_bar = ioremap_nocache(map_addr, bar_sz);
+		vf->q_bar = ioremap(map_addr, bar_sz);
 		if (!vf->q_bar) {
 			nn_err(nn, "Failed to map resource %d\n", tx_bar_no);
 			err = -EIO;
@@ -216,7 +216,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 
 		/* TX queues */
 		map_addr = pci_resource_start(pdev, tx_bar_no) + tx_bar_off;
-		nn->tx_bar = ioremap_nocache(map_addr, tx_bar_sz);
+		nn->tx_bar = ioremap(map_addr, tx_bar_sz);
 		if (!nn->tx_bar) {
 			nn_err(nn, "Failed to map resource %d\n", tx_bar_no);
 			err = -EIO;
@@ -225,7 +225,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 
 		/* RX queues */
 		map_addr = pci_resource_start(pdev, rx_bar_no) + rx_bar_off;
-		nn->rx_bar = ioremap_nocache(map_addr, rx_bar_sz);
+		nn->rx_bar = ioremap(map_addr, rx_bar_sz);
 		if (!nn->rx_bar) {
 			nn_err(nn, "Failed to map resource %d\n", rx_bar_no);
 			err = -EIO;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index 85d46f206b3c..b454db283aef 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -611,7 +611,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
 	/* Configure, and lock, BAR0.0 for General Target use (MSI-X SRAM) */
 	bar = &nfp->bar[0];
 	if (nfp_bar_resource_len(bar) >= NFP_PCI_MIN_MAP_SIZE)
-		bar->iomem = ioremap_nocache(nfp_bar_resource_start(bar),
+		bar->iomem = ioremap(nfp_bar_resource_start(bar),
 					     nfp_bar_resource_len(bar));
 	if (bar->iomem) {
 		int pf;
@@ -677,7 +677,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface)
 		}
 
 		bar = &nfp->bar[4 + i];
-		bar->iomem = ioremap_nocache(nfp_bar_resource_start(bar),
+		bar->iomem = ioremap(nfp_bar_resource_start(bar),
 					     nfp_bar_resource_len(bar));
 		if (bar->iomem) {
 			msg += snprintf(msg, end - msg,
@@ -858,7 +858,7 @@ static int nfp6000_area_acquire(struct nfp_cpp_area *area)
 		priv->iomem = priv->bar->iomem + priv->bar_offset;
 	else
 		/* Must have been too big. Sub-allocate. */
-		priv->iomem = ioremap_nocache(priv->phys, priv->size);
+		priv->iomem = ioremap(priv->phys, priv->size);
 
 	if (IS_ERR_OR_NULL(priv->iomem)) {
 		dev_err(nfp->dev, "Can't ioremap() a %d byte region of BAR %d\n",
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 6b54cb3b681d..2fc10a36afa4 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2739,7 +2739,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
  * nv_tx_timeout: dev->tx_timeout function
  * Called with netif_tx_lock held.
  */
-static void nv_tx_timeout(struct net_device *dev)
+static void nv_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 656169214cdb..d20cf03a3ea0 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1149,19 +1149,6 @@ static void lpc_eth_set_multicast_list(struct net_device *ndev)
 	spin_unlock_irqrestore(&pldat->lock, flags);
 }
 
-static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
-{
-	struct phy_device *phydev = ndev->phydev;
-
-	if (!netif_running(ndev))
-		return -EINVAL;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(phydev, req, cmd);
-}
-
 static int lpc_eth_open(struct net_device *ndev)
 {
 	struct netdata_local *pldat = netdev_priv(ndev);
@@ -1229,7 +1216,7 @@ static const struct net_device_ops lpc_netdev_ops = {
 	.ndo_stop		= lpc_eth_close,
 	.ndo_start_xmit		= lpc_eth_hard_start_xmit,
 	.ndo_set_rx_mode	= lpc_eth_set_multicast_list,
-	.ndo_do_ioctl		= lpc_eth_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_set_mac_address	= lpc_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 18e6d87c607b..73ec195fbc30 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -2271,7 +2271,7 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
  * pch_gbe_tx_timeout - Respond to a Tx Hang
  * @netdev:   Network interface device structure
  */
-static void pch_gbe_tx_timeout(struct net_device *netdev)
+static void pch_gbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index eee883a2aa8d..70816d2e2990 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -548,7 +548,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val
 static int hamachi_open(struct net_device *dev);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static void hamachi_timer(struct timer_list *t);
-static void hamachi_tx_timeout(struct net_device *dev);
+static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void hamachi_init_ring(struct net_device *dev);
 static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
@@ -1042,7 +1042,7 @@ static void hamachi_timer(struct timer_list *t)
 	add_timer(&hmp->timer);
 }
 
-static void hamachi_tx_timeout(struct net_device *dev)
+static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	int i;
 	struct hamachi_private *hmp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 5113ee647090..520779f05e1a 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -344,7 +344,7 @@ static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static int yellowfin_open(struct net_device *dev);
 static void yellowfin_timer(struct timer_list *t);
-static void yellowfin_tx_timeout(struct net_device *dev);
+static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static int yellowfin_init_ring(struct net_device *dev);
 static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb,
 					struct net_device *dev);
@@ -677,7 +677,7 @@ static void yellowfin_timer(struct timer_list *t)
 	add_timer(&yp->timer);
 }
 
-static void yellowfin_tx_timeout(struct net_device *dev)
+static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct yellowfin_private *yp = netdev_priv(dev);
 	void __iomem *ioaddr = yp->base;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 98e102af7756..bb106a32f416 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -12,19 +12,27 @@ struct ionic_lif;
 
 #define IONIC_DRV_NAME		"ionic"
 #define IONIC_DRV_DESCRIPTION	"Pensando Ethernet NIC Driver"
-#define IONIC_DRV_VERSION	"0.18.0-k"
+#define IONIC_DRV_VERSION	"0.20.0-k"
 
 #define PCI_VENDOR_ID_PENSANDO			0x1dd8
 
 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF	0x1002
 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF	0x1003
 
-#define IONIC_SUBDEV_ID_NAPLES_25	0x4000
-#define IONIC_SUBDEV_ID_NAPLES_100_4	0x4001
-#define IONIC_SUBDEV_ID_NAPLES_100_8	0x4002
-
 #define DEVCMD_TIMEOUT  10
 
+struct ionic_vf {
+	u16	 index;
+	u8	 macaddr[6];
+	__le32	 maxrate;
+	__le16	 vlanid;
+	u8	 spoofchk;
+	u8	 trusted;
+	u8	 linkstate;
+	dma_addr_t       stats_pa;
+	struct ionic_lif_stats stats;
+};
+
 struct ionic {
 	struct pci_dev *pdev;
 	struct device *dev;
@@ -46,6 +54,9 @@ struct ionic {
 	DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
 	struct work_struct nb_work;
 	struct notifier_block nb;
+	struct rw_semaphore vf_op_lock;	/* lock for VF operations */
+	struct ionic_vf *vfs;
+	int num_vfs;
 	struct timer_list watchdog_timer;
 	int watchdog_period;
 };
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 9a9ab8cb2cb3..448d7b23b2f7 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -104,10 +104,112 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
 	iounmap(page);
 }
 
+static void ionic_vf_dealloc_locked(struct ionic *ionic)
+{
+	struct ionic_vf *v;
+	dma_addr_t dma = 0;
+	int i;
+
+	if (!ionic->vfs)
+		return;
+
+	for (i = ionic->num_vfs - 1; i >= 0; i--) {
+		v = &ionic->vfs[i];
+
+		if (v->stats_pa) {
+			(void)ionic_set_vf_config(ionic, i,
+						  IONIC_VF_ATTR_STATSADDR,
+						  (u8 *)&dma);
+			dma_unmap_single(ionic->dev, v->stats_pa,
+					 sizeof(v->stats), DMA_FROM_DEVICE);
+			v->stats_pa = 0;
+		}
+	}
+
+	kfree(ionic->vfs);
+	ionic->vfs = NULL;
+	ionic->num_vfs = 0;
+}
+
+static void ionic_vf_dealloc(struct ionic *ionic)
+{
+	down_write(&ionic->vf_op_lock);
+	ionic_vf_dealloc_locked(ionic);
+	up_write(&ionic->vf_op_lock);
+}
+
+static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
+{
+	struct ionic_vf *v;
+	int err = 0;
+	int i;
+
+	down_write(&ionic->vf_op_lock);
+
+	ionic->vfs = kcalloc(num_vfs, sizeof(struct ionic_vf), GFP_KERNEL);
+	if (!ionic->vfs) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < num_vfs; i++) {
+		v = &ionic->vfs[i];
+		v->stats_pa = dma_map_single(ionic->dev, &v->stats,
+					     sizeof(v->stats), DMA_FROM_DEVICE);
+		if (dma_mapping_error(ionic->dev, v->stats_pa)) {
+			v->stats_pa = 0;
+			err = -ENODEV;
+			goto out;
+		}
+
+		/* ignore failures from older FW, we just won't get stats */
+		(void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
+					  (u8 *)&v->stats_pa);
+		ionic->num_vfs++;
+	}
+
+out:
+	if (err)
+		ionic_vf_dealloc_locked(ionic);
+	up_write(&ionic->vf_op_lock);
+	return err;
+}
+
+static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct ionic *ionic = pci_get_drvdata(pdev);
+	struct device *dev = ionic->dev;
+	int ret = 0;
+
+	if (num_vfs > 0) {
+		ret = pci_enable_sriov(pdev, num_vfs);
+		if (ret) {
+			dev_err(dev, "Cannot enable SRIOV: %d\n", ret);
+			goto out;
+		}
+
+		ret = ionic_vf_alloc(ionic, num_vfs);
+		if (ret) {
+			dev_err(dev, "Cannot alloc VFs: %d\n", ret);
+			pci_disable_sriov(pdev);
+			goto out;
+		}
+
+		ret = num_vfs;
+	} else {
+		pci_disable_sriov(pdev);
+		ionic_vf_dealloc(ionic);
+	}
+
+out:
+	return ret;
+}
+
 static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
 	struct ionic *ionic;
+	int num_vfs;
 	int err;
 
 	ionic = ionic_devlink_alloc(dev);
@@ -206,6 +308,15 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_out_free_lifs;
 	}
 
+	init_rwsem(&ionic->vf_op_lock);
+	num_vfs = pci_num_vf(pdev);
+	if (num_vfs) {
+		dev_info(dev, "%d VFs found already enabled\n", num_vfs);
+		err = ionic_vf_alloc(ionic, num_vfs);
+		if (err)
+			dev_err(dev, "Cannot enable existing VFs: %d\n", err);
+	}
+
 	err = ionic_lifs_register(ionic);
 	if (err) {
 		dev_err(dev, "Cannot register LIFs: %d, aborting\n", err);
@@ -223,6 +334,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 err_out_deregister_lifs:
 	ionic_lifs_unregister(ionic);
 err_out_deinit_lifs:
+	ionic_vf_dealloc(ionic);
 	ionic_lifs_deinit(ionic);
 err_out_free_lifs:
 	ionic_lifs_free(ionic);
@@ -279,6 +391,7 @@ static struct pci_driver ionic_driver = {
 	.id_table = ionic_id_table,
 	.probe = ionic_probe,
 	.remove = ionic_remove,
+	.sriov_configure = ionic_sriov_configure,
 };
 
 int ionic_bus_register_driver(void)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 5f9d2ec70446..87f82f36812f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -286,6 +286,64 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
 	ionic_dev_cmd_go(idev, &cmd);
 }
 
+/* VF commands */
+int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+{
+	union ionic_dev_cmd cmd = {
+		.vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
+		.vf_setattr.attr = attr,
+		.vf_setattr.vf_index = vf,
+	};
+	int err;
+
+	switch (attr) {
+	case IONIC_VF_ATTR_SPOOFCHK:
+		cmd.vf_setattr.spoofchk = *data;
+		dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+			__func__, vf, *data);
+		break;
+	case IONIC_VF_ATTR_TRUST:
+		cmd.vf_setattr.trust = *data;
+		dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+			__func__, vf, *data);
+		break;
+	case IONIC_VF_ATTR_LINKSTATE:
+		cmd.vf_setattr.linkstate = *data;
+		dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+			__func__, vf, *data);
+		break;
+	case IONIC_VF_ATTR_MAC:
+		ether_addr_copy(cmd.vf_setattr.macaddr, data);
+		dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+			__func__, vf, data);
+		break;
+	case IONIC_VF_ATTR_VLAN:
+		cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
+		dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+			__func__, vf, *(u16 *)data);
+		break;
+	case IONIC_VF_ATTR_RATE:
+		cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
+		dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+			__func__, vf, *(u32 *)data);
+		break;
+	case IONIC_VF_ATTR_STATSADDR:
+		cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
+		dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
+			__func__, vf, *(u64 *)data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mutex_lock(&ionic->dev_cmd_lock);
+	ionic_dev_cmd_go(&ionic->idev, &cmd);
+	err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+	mutex_unlock(&ionic->dev_cmd_lock);
+
+	return err;
+}
+
 /* LIF commands */
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver)
 {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 4665c5dc5324..7838e342c4fd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -113,6 +113,12 @@ static_assert(sizeof(struct ionic_rxq_desc) == 16);
 static_assert(sizeof(struct ionic_rxq_sg_desc) == 128);
 static_assert(sizeof(struct ionic_rxq_comp) == 16);
 
+/* SR/IOV */
+static_assert(sizeof(struct ionic_vf_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_vf_setattr_comp) == 16);
+static_assert(sizeof(struct ionic_vf_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_vf_getattr_comp) == 16);
+
 struct ionic_devinfo {
 	u8 asic_type;
 	u8 asic_rev;
@@ -275,6 +281,7 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
 void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
 void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
 
+int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
 void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
 			    dma_addr_t addr);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 39317cdfa6cf..f131adad96e3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -51,6 +51,10 @@ enum ionic_cmd_opcode {
 	IONIC_CMD_RDMA_CREATE_CQ		= 52,
 	IONIC_CMD_RDMA_CREATE_ADMINQ		= 53,
 
+	/* SR/IOV commands */
+	IONIC_CMD_VF_GETATTR			= 60,
+	IONIC_CMD_VF_SETATTR			= 61,
+
 	/* QoS commands */
 	IONIC_CMD_QOS_CLASS_IDENTIFY		= 240,
 	IONIC_CMD_QOS_CLASS_INIT		= 241,
@@ -1639,6 +1643,93 @@ enum ionic_qos_sched_type {
 	IONIC_QOS_SCHED_TYPE_DWRR	= 1,	/* Deficit weighted round-robin */
 };
 
+enum ionic_vf_attr {
+	IONIC_VF_ATTR_SPOOFCHK	= 1,
+	IONIC_VF_ATTR_TRUST	= 2,
+	IONIC_VF_ATTR_MAC	= 3,
+	IONIC_VF_ATTR_LINKSTATE	= 4,
+	IONIC_VF_ATTR_VLAN	= 5,
+	IONIC_VF_ATTR_RATE	= 6,
+	IONIC_VF_ATTR_STATSADDR	= 7,
+};
+
+/**
+ * VF link status
+ */
+enum ionic_vf_link_status {
+	IONIC_VF_LINK_STATUS_AUTO = 0,	/* link state of the uplink */
+	IONIC_VF_LINK_STATUS_UP   = 1,	/* link is always up */
+	IONIC_VF_LINK_STATUS_DOWN = 2,	/* link is always down */
+};
+
+/**
+ * struct ionic_vf_setattr_cmd - Set VF attributes on the NIC
+ * @opcode:     Opcode
+ * @index:      VF index
+ * @attr:       Attribute type (enum ionic_vf_attr)
+ *	macaddr		mac address
+ *	vlanid		vlan ID
+ *	maxrate		max Tx rate in Mbps
+ *	spoofchk	enable address spoof checking
+ *	trust		enable VF trust
+ *	linkstate	set link up or down
+ *	stats_pa	set DMA address for VF stats
+ */
+struct ionic_vf_setattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 vf_index;
+	union {
+		u8     macaddr[6];
+		__le16 vlanid;
+		__le32 maxrate;
+		u8     spoofchk;
+		u8     trust;
+		u8     linkstate;
+		__le64 stats_pa;
+		u8     pad[60];
+	};
+};
+
+struct ionic_vf_setattr_comp {
+	u8     status;
+	u8     attr;
+	__le16 vf_index;
+	__le16 comp_index;
+	u8     rsvd[9];
+	u8     color;
+};
+
+/**
+ * struct ionic_vf_getattr_cmd - Get VF attributes from the NIC
+ * @opcode:     Opcode
+ * @index:      VF index
+ * @attr:       Attribute type (enum ionic_vf_attr)
+ */
+struct ionic_vf_getattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 vf_index;
+	u8     rsvd[60];
+};
+
+struct ionic_vf_getattr_comp {
+	u8     status;
+	u8     attr;
+	__le16 vf_index;
+	union {
+		u8     macaddr[6];
+		__le16 vlanid;
+		__le32 maxrate;
+		u8     spoofchk;
+		u8     trust;
+		u8     linkstate;
+		__le64 stats_pa;
+		u8     pad[11];
+	};
+	u8     color;
+};
+
 /**
  * union ionic_qos_config - Qos configuration structure
  * @flags:		Configuration flags
@@ -2289,6 +2380,9 @@ union ionic_dev_cmd {
 	struct ionic_port_getattr_cmd port_getattr;
 	struct ionic_port_setattr_cmd port_setattr;
 
+	struct ionic_vf_setattr_cmd vf_setattr;
+	struct ionic_vf_getattr_cmd vf_getattr;
+
 	struct ionic_lif_identify_cmd lif_identify;
 	struct ionic_lif_init_cmd lif_init;
 	struct ionic_lif_reset_cmd lif_reset;
@@ -2318,6 +2412,9 @@ union ionic_dev_cmd_comp {
 	struct ionic_port_getattr_comp port_getattr;
 	struct ionic_port_setattr_comp port_setattr;
 
+	struct ionic_vf_setattr_comp vf_setattr;
+	struct ionic_vf_getattr_comp vf_getattr;
+
 	struct ionic_lif_identify_comp lif_identify;
 	struct ionic_lif_init_comp lif_init;
 	ionic_lif_reset_comp lif_reset;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index ef8258713369..191271f6260d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1285,7 +1285,7 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
 	rtnl_unlock();
 }
 
-static void ionic_tx_timeout(struct net_device *netdev)
+static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 
@@ -1619,6 +1619,227 @@ int ionic_stop(struct net_device *netdev)
 	return err;
 }
 
+static int ionic_get_vf_config(struct net_device *netdev,
+			       int vf, struct ifla_vf_info *ivf)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	int ret = 0;
+
+	down_read(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		ivf->vf           = vf;
+		ivf->vlan         = ionic->vfs[vf].vlanid;
+		ivf->qos	  = 0;
+		ivf->spoofchk     = ionic->vfs[vf].spoofchk;
+		ivf->linkstate    = ionic->vfs[vf].linkstate;
+		ivf->max_tx_rate  = ionic->vfs[vf].maxrate;
+		ivf->trusted      = ionic->vfs[vf].trusted;
+		ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+	}
+
+	up_read(&ionic->vf_op_lock);
+	return ret;
+}
+
+static int ionic_get_vf_stats(struct net_device *netdev, int vf,
+			      struct ifla_vf_stats *vf_stats)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	struct ionic_lif_stats *vs;
+	int ret = 0;
+
+	down_read(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		memset(vf_stats, 0, sizeof(*vf_stats));
+		vs = &ionic->vfs[vf].stats;
+
+		vf_stats->rx_packets = le64_to_cpu(vs->rx_ucast_packets);
+		vf_stats->tx_packets = le64_to_cpu(vs->tx_ucast_packets);
+		vf_stats->rx_bytes   = le64_to_cpu(vs->rx_ucast_bytes);
+		vf_stats->tx_bytes   = le64_to_cpu(vs->tx_ucast_bytes);
+		vf_stats->broadcast  = le64_to_cpu(vs->rx_bcast_packets);
+		vf_stats->multicast  = le64_to_cpu(vs->rx_mcast_packets);
+		vf_stats->rx_dropped = le64_to_cpu(vs->rx_ucast_drop_packets) +
+				       le64_to_cpu(vs->rx_mcast_drop_packets) +
+				       le64_to_cpu(vs->rx_bcast_drop_packets);
+		vf_stats->tx_dropped = le64_to_cpu(vs->tx_ucast_drop_packets) +
+				       le64_to_cpu(vs->tx_mcast_drop_packets) +
+				       le64_to_cpu(vs->tx_bcast_drop_packets);
+	}
+
+	up_read(&ionic->vf_op_lock);
+	return ret;
+}
+
+static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	int ret;
+
+	if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac)))
+		return -EINVAL;
+
+	down_read(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac);
+		if (!ret)
+			ether_addr_copy(ionic->vfs[vf].macaddr, mac);
+	}
+
+	up_read(&ionic->vf_op_lock);
+	return ret;
+}
+
+static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+			     u8 qos, __be16 proto)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	int ret;
+
+	/* until someday when we support qos */
+	if (qos)
+		return -EINVAL;
+
+	if (vlan > 4095)
+		return -EINVAL;
+
+	if (proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	down_read(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		ret = ionic_set_vf_config(ionic, vf,
+					  IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+		if (!ret)
+			ionic->vfs[vf].vlanid = vlan;
+	}
+
+	up_read(&ionic->vf_op_lock);
+	return ret;
+}
+
+static int ionic_set_vf_rate(struct net_device *netdev, int vf,
+			     int tx_min, int tx_max)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	int ret;
+
+	/* setting the min just seems silly */
+	if (tx_min)
+		return -EINVAL;
+
+	down_write(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		ret = ionic_set_vf_config(ionic, vf,
+					  IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+		if (!ret)
+			lif->ionic->vfs[vf].maxrate = tx_max;
+	}
+
+	up_write(&ionic->vf_op_lock);
+	return ret;
+}
+
+static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	u8 data = set;  /* convert to u8 for config */
+	int ret;
+
+	down_write(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		ret = ionic_set_vf_config(ionic, vf,
+					  IONIC_VF_ATTR_SPOOFCHK, &data);
+		if (!ret)
+			ionic->vfs[vf].spoofchk = data;
+	}
+
+	up_write(&ionic->vf_op_lock);
+	return ret;
+}
+
+static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	u8 data = set;  /* convert to u8 for config */
+	int ret;
+
+	down_write(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		ret = ionic_set_vf_config(ionic, vf,
+					  IONIC_VF_ATTR_TRUST, &data);
+		if (!ret)
+			ionic->vfs[vf].trusted = data;
+	}
+
+	up_write(&ionic->vf_op_lock);
+	return ret;
+}
+
+static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	u8 data;
+	int ret;
+
+	switch (set) {
+	case IFLA_VF_LINK_STATE_ENABLE:
+		data = IONIC_VF_LINK_STATUS_UP;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		data = IONIC_VF_LINK_STATUS_DOWN;
+		break;
+	case IFLA_VF_LINK_STATE_AUTO:
+		data = IONIC_VF_LINK_STATUS_AUTO;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	down_write(&ionic->vf_op_lock);
+
+	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+		ret = -EINVAL;
+	} else {
+		ret = ionic_set_vf_config(ionic, vf,
+					  IONIC_VF_ATTR_LINKSTATE, &data);
+		if (!ret)
+			ionic->vfs[vf].linkstate = set;
+	}
+
+	up_write(&ionic->vf_op_lock);
+	return ret;
+}
+
 static const struct net_device_ops ionic_netdev_ops = {
 	.ndo_open               = ionic_open,
 	.ndo_stop               = ionic_stop,
@@ -1632,6 +1853,14 @@ static const struct net_device_ops ionic_netdev_ops = {
 	.ndo_change_mtu         = ionic_change_mtu,
 	.ndo_vlan_rx_add_vid    = ionic_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid   = ionic_vlan_rx_kill_vid,
+	.ndo_set_vf_vlan	= ionic_set_vf_vlan,
+	.ndo_set_vf_trust	= ionic_set_vf_trust,
+	.ndo_set_vf_mac		= ionic_set_vf_mac,
+	.ndo_set_vf_rate	= ionic_set_vf_rate,
+	.ndo_set_vf_spoofchk	= ionic_set_vf_spoofchk,
+	.ndo_get_vf_config	= ionic_get_vf_config,
+	.ndo_set_vf_link_state	= ionic_set_vf_link_state,
+	.ndo_get_vf_stats       = ionic_get_vf_stats,
 };
 
 int ionic_reset_queues(struct ionic_lif *lif)
@@ -1965,18 +2194,22 @@ static int ionic_station_set(struct ionic_lif *lif)
 	if (err)
 		return err;
 
+	if (is_zero_ether_addr(ctx.comp.lif_getattr.mac))
+		return 0;
+
 	memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
 	addr.sa_family = AF_INET;
 	err = eth_prepare_mac_addr_change(netdev, &addr);
-	if (err)
-		return err;
-
-	if (!is_zero_ether_addr(netdev->dev_addr)) {
-		netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
-			   netdev->dev_addr);
-		ionic_lif_addr(lif, netdev->dev_addr, false);
+	if (err) {
+		netdev_warn(lif->netdev, "ignoring bad MAC addr from NIC %pM\n",
+			    addr.sa_data);
+		return 0;
 	}
 
+	netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
+		   netdev->dev_addr);
+	ionic_lif_addr(lif, netdev->dev_addr, false);
+
 	eth_commit_mac_addr_change(netdev, &addr);
 	netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
 		   netdev->dev_addr);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index a55fd1f8c31b..9c5a7dd45f9d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -37,6 +37,7 @@ struct ionic_rx_stats {
 	u64 csum_complete;
 	u64 csum_error;
 	u64 buffers_posted;
+	u64 dropped;
 };
 
 #define IONIC_QCQ_F_INITED		BIT(0)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 3590ea7fd88a..a8e3fb73b465 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -165,6 +165,10 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 		return "IONIC_CMD_FW_DOWNLOAD";
 	case IONIC_CMD_FW_CONTROL:
 		return "IONIC_CMD_FW_CONTROL";
+	case IONIC_CMD_VF_GETATTR:
+		return "IONIC_CMD_VF_GETATTR";
+	case IONIC_CMD_VF_SETATTR:
+		return "IONIC_CMD_VF_SETATTR";
 	default:
 		return "DEVCMD_UNKNOWN";
 	}
@@ -326,9 +330,9 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
 	unsigned long max_wait;
 	unsigned long duration;
 	int opcode;
+	int hb = 0;
 	int done;
 	int err;
-	int hb;
 
 	WARN_ON(in_interrupt());
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 03916b6d47f2..a1e9796a660a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -39,6 +39,7 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
 	IONIC_RX_STAT_DESC(csum_none),
 	IONIC_RX_STAT_DESC(csum_complete),
 	IONIC_RX_STAT_DESC(csum_error),
+	IONIC_RX_STAT_DESC(dropped),
 };
 
 static const struct ionic_stat_desc ionic_txq_stats_desc[] = {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 97e79949b359..e452f4242ba0 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -152,12 +152,16 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
 	stats = q_to_rx_stats(q);
 	netdev = q->lif->netdev;
 
-	if (comp->status)
+	if (comp->status) {
+		stats->dropped++;
 		return;
+	}
 
 	/* no packet processing while resetting */
-	if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state)))
+	if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) {
+		stats->dropped++;
 		return;
+	}
 
 	stats->pkts++;
 	stats->bytes += le16_to_cpu(comp->len);
@@ -167,8 +171,10 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
 	else
 		skb = ionic_rx_frags(q, desc_info, cq_info);
 
-	if (unlikely(!skb))
+	if (unlikely(!skb)) {
+		stats->dropped++;
 		return;
+	}
 
 	skb_record_rx_queue(skb, q->index);
 
@@ -337,6 +343,8 @@ void ionic_rx_fill(struct ionic_queue *q)
 	struct ionic_rxq_sg_desc *sg_desc;
 	struct ionic_rxq_sg_elem *sg_elem;
 	struct ionic_rxq_desc *desc;
+	unsigned int remain_len;
+	unsigned int seg_len;
 	unsigned int nfrags;
 	bool ring_doorbell;
 	unsigned int i, j;
@@ -346,6 +354,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 	nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
 
 	for (i = ionic_q_space_avail(q); i; i--) {
+		remain_len = len;
 		desc_info = q->head;
 		desc = desc_info->desc;
 		sg_desc = desc_info->sg_desc;
@@ -369,7 +378,9 @@ void ionic_rx_fill(struct ionic_queue *q)
 			return;
 		}
 		desc->addr = cpu_to_le64(page_info->dma_addr);
-		desc->len = cpu_to_le16(PAGE_SIZE);
+		seg_len = min_t(unsigned int, PAGE_SIZE, len);
+		desc->len = cpu_to_le16(seg_len);
+		remain_len -= seg_len;
 		page_info++;
 
 		/* fill sg descriptors - pages[1..n] */
@@ -385,7 +396,9 @@ void ionic_rx_fill(struct ionic_queue *q)
 				return;
 			}
 			sg_elem->addr = cpu_to_le64(page_info->dma_addr);
-			sg_elem->len = cpu_to_le16(PAGE_SIZE);
+			seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
+			sg_elem->len = cpu_to_le16(seg_len);
+			remain_len -= seg_len;
 			page_info++;
 		}
 
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index c692a41e4548..8067ea04d455 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -49,7 +49,7 @@ static int netxen_nic_open(struct net_device *netdev);
 static int netxen_nic_close(struct net_device *netdev);
 static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *,
 					       struct net_device *);
-static void netxen_tx_timeout(struct net_device *netdev);
+static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 static void netxen_tx_timeout_task(struct work_struct *work);
 static void netxen_fw_poll_work(struct work_struct *work);
 static void netxen_schedule_work(struct netxen_adapter *adapter,
@@ -2222,7 +2222,7 @@ static void netxen_nic_handle_phy_intr(struct netxen_adapter *adapter)
 	netxen_advert_link_change(adapter, linkup);
 }
 
-static void netxen_tx_timeout(struct net_device *netdev)
+static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct netxen_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 89fe091c958d..fa41bf08a589 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -253,7 +253,8 @@ enum qed_resources {
 	QED_VLAN,
 	QED_RDMA_CNQ_RAM,
 	QED_ILT,
-	QED_LL2_QUEUE,
+	QED_LL2_RAM_QUEUE,
+	QED_LL2_CTX_QUEUE,
 	QED_CMDQS_CQS,
 	QED_RDMA_STATS_QUEUE,
 	QED_BDQ,
@@ -461,6 +462,8 @@ struct qed_fw_data {
 	const u8		*modes_tree_buf;
 	union init_op		*init_ops;
 	const u32		*arr_data;
+	const u32		*fw_overlays;
+	u32			fw_overlays_len;
 	u32			init_ops_size;
 };
 
@@ -531,6 +534,23 @@ struct qed_nvm_image_info {
 	bool valid;
 };
 
+enum qed_hsi_def_type {
+	QED_HSI_DEF_MAX_NUM_VFS,
+	QED_HSI_DEF_MAX_NUM_L2_QUEUES,
+	QED_HSI_DEF_MAX_NUM_PORTS,
+	QED_HSI_DEF_MAX_SB_PER_PATH,
+	QED_HSI_DEF_MAX_NUM_PFS,
+	QED_HSI_DEF_MAX_NUM_VPORTS,
+	QED_HSI_DEF_NUM_ETH_RSS_ENGINE,
+	QED_HSI_DEF_MAX_QM_TX_QUEUES,
+	QED_HSI_DEF_NUM_PXP_ILT_RECORDS,
+	QED_HSI_DEF_NUM_RDMA_STATISTIC_COUNTERS,
+	QED_HSI_DEF_MAX_QM_GLOBAL_RLS,
+	QED_HSI_DEF_MAX_PBF_CMD_LINES,
+	QED_HSI_DEF_MAX_BTB_BLOCKS,
+	QED_NUM_HSI_DEFS
+};
+
 #define DRV_MODULE_VERSION		      \
 	__stringify(QED_MAJOR_VERSION) "."    \
 	__stringify(QED_MINOR_VERSION) "."    \
@@ -646,6 +666,7 @@ struct qed_hwfn {
 
 	struct dbg_tools_data		dbg_info;
 	void				*dbg_user_info;
+	struct virt_mem_desc		dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE];
 
 	/* PWM region specific data */
 	u16				wid_count;
@@ -668,6 +689,7 @@ struct qed_hwfn {
 	/* Nvm images number and attributes */
 	struct qed_nvm_image_info nvm_info;
 
+	struct phys_mem_desc *fw_overlay_mem;
 	struct qed_ptt *p_arfs_ptt;
 
 	struct qed_simd_fp_handler	simd_proto_handler[64];
@@ -796,8 +818,8 @@ struct qed_dev {
 	u8				cache_shift;
 
 	/* Init */
-	const struct iro		*iro_arr;
-#define IRO (p_hwfn->cdev->iro_arr)
+	const u32 *iro_arr;
+#define IRO ((const struct iro *)p_hwfn->cdev->iro_arr)
 
 	/* HW functions */
 	u8				num_hwfns;
@@ -856,6 +878,8 @@ struct qed_dev {
 	struct qed_cb_ll2_info		*ll2;
 	u8				ll2_mac_address[ETH_ALEN];
 #endif
+	struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM];
+	bool disable_ilt_dump;
 	DECLARE_HASHTABLE(connections, 10);
 	const struct firmware		*firmware;
 
@@ -868,16 +892,35 @@ struct qed_dev {
 	bool				iwarp_cmt;
 };
 
-#define NUM_OF_VFS(dev)         (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \
-						: MAX_NUM_VFS_K2)
-#define NUM_OF_L2_QUEUES(dev)   (QED_IS_BB(dev) ? MAX_NUM_L2_QUEUES_BB \
-						: MAX_NUM_L2_QUEUES_K2)
-#define NUM_OF_PORTS(dev)       (QED_IS_BB(dev) ? MAX_NUM_PORTS_BB \
-						: MAX_NUM_PORTS_K2)
-#define NUM_OF_SBS(dev)         (QED_IS_BB(dev) ? MAX_SB_PER_PATH_BB \
-						: MAX_SB_PER_PATH_K2)
-#define NUM_OF_ENG_PFS(dev)     (QED_IS_BB(dev) ? MAX_NUM_PFS_BB \
-						: MAX_NUM_PFS_K2)
+u32 qed_get_hsi_def_val(struct qed_dev *cdev, enum qed_hsi_def_type type);
+
+#define NUM_OF_VFS(dev)	\
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_NUM_VFS)
+#define NUM_OF_L2_QUEUES(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_NUM_L2_QUEUES)
+#define NUM_OF_PORTS(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_NUM_PORTS)
+#define NUM_OF_SBS(dev)	\
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_SB_PER_PATH)
+#define NUM_OF_ENG_PFS(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_NUM_PFS)
+#define NUM_OF_VPORTS(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_NUM_VPORTS)
+#define NUM_OF_RSS_ENGINES(dev)	\
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_NUM_ETH_RSS_ENGINE)
+#define NUM_OF_QM_TX_QUEUES(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_QM_TX_QUEUES)
+#define NUM_OF_PXP_ILT_RECORDS(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_NUM_PXP_ILT_RECORDS)
+#define NUM_OF_RDMA_STATISTIC_COUNTERS(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_NUM_RDMA_STATISTIC_COUNTERS)
+#define NUM_OF_QM_GLOBAL_RLS(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_QM_GLOBAL_RLS)
+#define NUM_OF_PBF_CMD_LINES(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_PBF_CMD_LINES)
+#define NUM_OF_BTB_BLOCKS(dev) \
+	qed_get_hsi_def_val(dev, QED_HSI_DEF_MAX_BTB_BLOCKS)
+
 
 /**
  * @brief qed_concrete_to_sw_fid - get the sw function id from
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 8e1bdf58b9e7..fbfff2b1dc93 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -50,12 +50,6 @@
 #include "qed_reg_addr.h"
 #include "qed_sriov.h"
 
-/* Max number of connection types in HW (DQ/CDU etc.) */
-#define MAX_CONN_TYPES		PROTOCOLID_COMMON
-#define NUM_TASK_TYPES		2
-#define NUM_TASK_PF_SEGMENTS	4
-#define NUM_TASK_VF_SEGMENTS	1
-
 /* QM constants */
 #define QM_PQ_ELEMENT_SIZE	4 /* in bytes */
 
@@ -123,126 +117,6 @@ struct src_ent {
 /* Alignment is inherent to the type1_task_context structure */
 #define TYPE1_TASK_CXT_SIZE(p_hwfn) sizeof(union type1_task_context)
 
-/* PF per protocl configuration object */
-#define TASK_SEGMENTS   (NUM_TASK_PF_SEGMENTS + NUM_TASK_VF_SEGMENTS)
-#define TASK_SEGMENT_VF (NUM_TASK_PF_SEGMENTS)
-
-struct qed_tid_seg {
-	u32 count;
-	u8 type;
-	bool has_fl_mem;
-};
-
-struct qed_conn_type_cfg {
-	u32 cid_count;
-	u32 cids_per_vf;
-	struct qed_tid_seg tid_seg[TASK_SEGMENTS];
-};
-
-/* ILT Client configuration, Per connection type (protocol) resources. */
-#define ILT_CLI_PF_BLOCKS	(1 + NUM_TASK_PF_SEGMENTS * 2)
-#define ILT_CLI_VF_BLOCKS       (1 + NUM_TASK_VF_SEGMENTS * 2)
-#define CDUC_BLK		(0)
-#define SRQ_BLK                 (0)
-#define CDUT_SEG_BLK(n)         (1 + (u8)(n))
-#define CDUT_FL_SEG_BLK(n, X)   (1 + (n) + NUM_TASK_ ## X ## _SEGMENTS)
-
-enum ilt_clients {
-	ILT_CLI_CDUC,
-	ILT_CLI_CDUT,
-	ILT_CLI_QM,
-	ILT_CLI_TM,
-	ILT_CLI_SRC,
-	ILT_CLI_TSDM,
-	ILT_CLI_MAX
-};
-
-struct ilt_cfg_pair {
-	u32 reg;
-	u32 val;
-};
-
-struct qed_ilt_cli_blk {
-	u32 total_size; /* 0 means not active */
-	u32 real_size_in_page;
-	u32 start_line;
-	u32 dynamic_line_cnt;
-};
-
-struct qed_ilt_client_cfg {
-	bool active;
-
-	/* ILT boundaries */
-	struct ilt_cfg_pair first;
-	struct ilt_cfg_pair last;
-	struct ilt_cfg_pair p_size;
-
-	/* ILT client blocks for PF */
-	struct qed_ilt_cli_blk pf_blks[ILT_CLI_PF_BLOCKS];
-	u32 pf_total_lines;
-
-	/* ILT client blocks for VFs */
-	struct qed_ilt_cli_blk vf_blks[ILT_CLI_VF_BLOCKS];
-	u32 vf_total_lines;
-};
-
-/* Per Path -
- *      ILT shadow table
- *      Protocol acquired CID lists
- *      PF start line in ILT
- */
-struct qed_dma_mem {
-	dma_addr_t p_phys;
-	void *p_virt;
-	size_t size;
-};
-
-struct qed_cid_acquired_map {
-	u32		start_cid;
-	u32		max_count;
-	unsigned long	*cid_map;
-};
-
-struct qed_cxt_mngr {
-	/* Per protocl configuration */
-	struct qed_conn_type_cfg	conn_cfg[MAX_CONN_TYPES];
-
-	/* computed ILT structure */
-	struct qed_ilt_client_cfg	clients[ILT_CLI_MAX];
-
-	/* Task type sizes */
-	u32 task_type_size[NUM_TASK_TYPES];
-
-	/* total number of VFs for this hwfn -
-	 * ALL VFs are symmetric in terms of HW resources
-	 */
-	u32				vf_count;
-
-	/* Acquired CIDs */
-	struct qed_cid_acquired_map	acquired[MAX_CONN_TYPES];
-
-	struct qed_cid_acquired_map
-	acquired_vf[MAX_CONN_TYPES][MAX_NUM_VFS];
-
-	/* ILT  shadow table */
-	struct qed_dma_mem		*ilt_shadow;
-	u32				pf_start_line;
-
-	/* Mutex for a dynamic ILT allocation */
-	struct mutex mutex;
-
-	/* SRC T2 */
-	struct qed_dma_mem *t2;
-	u32 t2_num_pages;
-	u64 first_free;
-	u64 last_free;
-
-	/* total number of SRQ's for this hwfn */
-	u32 srq_count;
-
-	/* Maximal number of L2 steering filters */
-	u32 arfs_count;
-};
 static bool src_proto(enum protocol_type type)
 {
 	return type == PROTOCOLID_ISCSI ||
@@ -880,30 +754,60 @@ u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines)
 
 static void qed_cxt_src_t2_free(struct qed_hwfn *p_hwfn)
 {
-	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	struct qed_src_t2 *p_t2 = &p_hwfn->p_cxt_mngr->src_t2;
 	u32 i;
 
-	if (!p_mngr->t2)
+	if (!p_t2 || !p_t2->dma_mem)
 		return;
 
-	for (i = 0; i < p_mngr->t2_num_pages; i++)
-		if (p_mngr->t2[i].p_virt)
+	for (i = 0; i < p_t2->num_pages; i++)
+		if (p_t2->dma_mem[i].virt_addr)
 			dma_free_coherent(&p_hwfn->cdev->pdev->dev,
-					  p_mngr->t2[i].size,
-					  p_mngr->t2[i].p_virt,
-					  p_mngr->t2[i].p_phys);
+					  p_t2->dma_mem[i].size,
+					  p_t2->dma_mem[i].virt_addr,
+					  p_t2->dma_mem[i].phys_addr);
+
+	kfree(p_t2->dma_mem);
+	p_t2->dma_mem = NULL;
+}
+
+static int
+qed_cxt_t2_alloc_pages(struct qed_hwfn *p_hwfn,
+		       struct qed_src_t2 *p_t2, u32 total_size, u32 page_size)
+{
+	void **p_virt;
+	u32 size, i;
+
+	if (!p_t2 || !p_t2->dma_mem)
+		return -EINVAL;
 
-	kfree(p_mngr->t2);
-	p_mngr->t2 = NULL;
+	for (i = 0; i < p_t2->num_pages; i++) {
+		size = min_t(u32, total_size, page_size);
+		p_virt = &p_t2->dma_mem[i].virt_addr;
+
+		*p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					     size,
+					     &p_t2->dma_mem[i].phys_addr,
+					     GFP_KERNEL);
+		if (!p_t2->dma_mem[i].virt_addr)
+			return -ENOMEM;
+
+		memset(*p_virt, 0, size);
+		p_t2->dma_mem[i].size = size;
+		total_size -= size;
+	}
+
+	return 0;
 }
 
 static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	u32 conn_num, total_size, ent_per_page, psz, i;
+	struct phys_mem_desc *p_t2_last_page;
 	struct qed_ilt_client_cfg *p_src;
 	struct qed_src_iids src_iids;
-	struct qed_dma_mem *p_t2;
+	struct qed_src_t2 *p_t2;
 	int rc;
 
 	memset(&src_iids, 0, sizeof(src_iids));
@@ -921,49 +825,39 @@ static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn)
 
 	/* use the same page size as the SRC ILT client */
 	psz = ILT_PAGE_IN_BYTES(p_src->p_size.val);
-	p_mngr->t2_num_pages = DIV_ROUND_UP(total_size, psz);
+	p_t2 = &p_mngr->src_t2;
+	p_t2->num_pages = DIV_ROUND_UP(total_size, psz);
 
 	/* allocate t2 */
-	p_mngr->t2 = kcalloc(p_mngr->t2_num_pages, sizeof(struct qed_dma_mem),
-			     GFP_KERNEL);
-	if (!p_mngr->t2) {
+	p_t2->dma_mem = kcalloc(p_t2->num_pages, sizeof(struct phys_mem_desc),
+				GFP_KERNEL);
+	if (!p_t2->dma_mem) {
+		DP_NOTICE(p_hwfn, "Failed to allocate t2 table\n");
 		rc = -ENOMEM;
 		goto t2_fail;
 	}
 
-	/* allocate t2 pages */
-	for (i = 0; i < p_mngr->t2_num_pages; i++) {
-		u32 size = min_t(u32, total_size, psz);
-		void **p_virt = &p_mngr->t2[i].p_virt;
-
-		*p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, size,
-					     &p_mngr->t2[i].p_phys,
-					     GFP_KERNEL);
-		if (!p_mngr->t2[i].p_virt) {
-			rc = -ENOMEM;
-			goto t2_fail;
-		}
-		p_mngr->t2[i].size = size;
-		total_size -= size;
-	}
+	rc = qed_cxt_t2_alloc_pages(p_hwfn, p_t2, total_size, psz);
+	if (rc)
+		goto t2_fail;
 
 	/* Set the t2 pointers */
 
 	/* entries per page - must be a power of two */
 	ent_per_page = psz / sizeof(struct src_ent);
 
-	p_mngr->first_free = (u64) p_mngr->t2[0].p_phys;
+	p_t2->first_free = (u64)p_t2->dma_mem[0].phys_addr;
 
-	p_t2 = &p_mngr->t2[(conn_num - 1) / ent_per_page];
-	p_mngr->last_free = (u64) p_t2->p_phys +
+	p_t2_last_page = &p_t2->dma_mem[(conn_num - 1) / ent_per_page];
+	p_t2->last_free = (u64)p_t2_last_page->phys_addr +
 	    ((conn_num - 1) & (ent_per_page - 1)) * sizeof(struct src_ent);
 
-	for (i = 0; i < p_mngr->t2_num_pages; i++) {
+	for (i = 0; i < p_t2->num_pages; i++) {
 		u32 ent_num = min_t(u32,
 				    ent_per_page,
 				    conn_num);
-		struct src_ent *entries = p_mngr->t2[i].p_virt;
-		u64 p_ent_phys = (u64) p_mngr->t2[i].p_phys, val;
+		struct src_ent *entries = p_t2->dma_mem[i].virt_addr;
+		u64 p_ent_phys = (u64)p_t2->dma_mem[i].phys_addr, val;
 		u32 j;
 
 		for (j = 0; j < ent_num - 1; j++) {
@@ -971,8 +865,8 @@ static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn)
 			entries[j].next = cpu_to_be64(val);
 		}
 
-		if (i < p_mngr->t2_num_pages - 1)
-			val = (u64) p_mngr->t2[i + 1].p_phys;
+		if (i < p_t2->num_pages - 1)
+			val = (u64)p_t2->dma_mem[i + 1].phys_addr;
 		else
 			val = 0;
 		entries[j].next = cpu_to_be64(val);
@@ -988,7 +882,7 @@ t2_fail:
 }
 
 #define for_each_ilt_valid_client(pos, clients)	\
-	for (pos = 0; pos < ILT_CLI_MAX; pos++)	\
+	for (pos = 0; pos < MAX_ILT_CLIENTS; pos++)	\
 		if (!clients[pos].active) {	\
 			continue;		\
 		} else				\
@@ -1014,13 +908,13 @@ static void qed_ilt_shadow_free(struct qed_hwfn *p_hwfn)
 	ilt_size = qed_cxt_ilt_shadow_size(p_cli);
 
 	for (i = 0; p_mngr->ilt_shadow && i < ilt_size; i++) {
-		struct qed_dma_mem *p_dma = &p_mngr->ilt_shadow[i];
+		struct phys_mem_desc *p_dma = &p_mngr->ilt_shadow[i];
 
-		if (p_dma->p_virt)
+		if (p_dma->virt_addr)
 			dma_free_coherent(&p_hwfn->cdev->pdev->dev,
-					  p_dma->size, p_dma->p_virt,
-					  p_dma->p_phys);
-		p_dma->p_virt = NULL;
+					  p_dma->size, p_dma->virt_addr,
+					  p_dma->phys_addr);
+		p_dma->virt_addr = NULL;
 	}
 	kfree(p_mngr->ilt_shadow);
 }
@@ -1030,7 +924,7 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
 			     enum ilt_clients ilt_client,
 			     u32 start_line_offset)
 {
-	struct qed_dma_mem *ilt_shadow = p_hwfn->p_cxt_mngr->ilt_shadow;
+	struct phys_mem_desc *ilt_shadow = p_hwfn->p_cxt_mngr->ilt_shadow;
 	u32 lines, line, sz_left, lines_to_skip = 0;
 
 	/* Special handling for RoCE that supports dynamic allocation */
@@ -1059,8 +953,8 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
 		if (!p_virt)
 			return -ENOMEM;
 
-		ilt_shadow[line].p_phys = p_phys;
-		ilt_shadow[line].p_virt = p_virt;
+		ilt_shadow[line].phys_addr = p_phys;
+		ilt_shadow[line].virt_addr = p_virt;
 		ilt_shadow[line].size = size;
 
 		DP_VERBOSE(p_hwfn, QED_MSG_ILT,
@@ -1083,7 +977,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn)
 	int rc;
 
 	size = qed_cxt_ilt_shadow_size(clients);
-	p_mngr->ilt_shadow = kcalloc(size, sizeof(struct qed_dma_mem),
+	p_mngr->ilt_shadow = kcalloc(size, sizeof(struct phys_mem_desc),
 				     GFP_KERNEL);
 	if (!p_mngr->ilt_shadow) {
 		rc = -ENOMEM;
@@ -1092,7 +986,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn)
 
 	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
 		   "Allocated 0x%x bytes for ilt shadow\n",
-		   (u32)(size * sizeof(struct qed_dma_mem)));
+		   (u32)(size * sizeof(struct phys_mem_desc)));
 
 	for_each_ilt_valid_client(i, clients) {
 		for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) {
@@ -1238,15 +1132,20 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn)
 	clients[ILT_CLI_TSDM].last.reg = ILT_CFG_REG(TSDM, LAST_ILT);
 	clients[ILT_CLI_TSDM].p_size.reg = ILT_CFG_REG(TSDM, P_SIZE);
 	/* default ILT page size for all clients is 64K */
-	for (i = 0; i < ILT_CLI_MAX; i++)
+	for (i = 0; i < MAX_ILT_CLIENTS; i++)
 		p_mngr->clients[i].p_size.val = ILT_DEFAULT_HW_P_SIZE;
 
+	p_mngr->conn_ctx_size = CONN_CXT_SIZE(p_hwfn);
+
 	/* Initialize task sizes */
 	p_mngr->task_type_size[0] = TYPE0_TASK_CXT_SIZE(p_hwfn);
 	p_mngr->task_type_size[1] = TYPE1_TASK_CXT_SIZE(p_hwfn);
 
-	if (p_hwfn->cdev->p_iov_info)
+	if (p_hwfn->cdev->p_iov_info) {
 		p_mngr->vf_count = p_hwfn->cdev->p_iov_info->total_vfs;
+		p_mngr->first_vf_in_pf =
+			p_hwfn->cdev->p_iov_info->first_vf_in_pf;
+	}
 	/* Initialize the dynamic ILT allocation mutex */
 	mutex_init(&p_mngr->mutex);
 
@@ -1522,7 +1421,6 @@ void qed_qm_init_pf(struct qed_hwfn *p_hwfn,
 	params.num_vports = qm_info->num_vports;
 	params.pf_wfq = qm_info->pf_wfq;
 	params.pf_rl = qm_info->pf_rl;
-	params.link_speed = p_link->speed;
 	params.pq_params = qm_info->qm_pq_params;
 	params.vport_params = qm_info->qm_vport_params;
 
@@ -1674,7 +1572,7 @@ static void qed_ilt_init_pf(struct qed_hwfn *p_hwfn)
 {
 	struct qed_ilt_client_cfg *clients;
 	struct qed_cxt_mngr *p_mngr;
-	struct qed_dma_mem *p_shdw;
+	struct phys_mem_desc *p_shdw;
 	u32 line, rt_offst, i;
 
 	qed_ilt_bounds_init(p_hwfn);
@@ -1699,15 +1597,15 @@ static void qed_ilt_init_pf(struct qed_hwfn *p_hwfn)
 			/** p_virt could be NULL incase of dynamic
 			 *  allocation
 			 */
-			if (p_shdw[line].p_virt) {
+			if (p_shdw[line].virt_addr) {
 				SET_FIELD(ilt_hw_entry, ILT_ENTRY_VALID, 1ULL);
 				SET_FIELD(ilt_hw_entry, ILT_ENTRY_PHY_ADDR,
-					  (p_shdw[line].p_phys >> 12));
+					  (p_shdw[line].phys_addr >> 12));
 
 				DP_VERBOSE(p_hwfn, QED_MSG_ILT,
 					   "Setting RT[0x%08x] from ILT[0x%08x] [Client is %d] to Physical addr: 0x%llx\n",
 					   rt_offst, line, i,
-					   (u64)(p_shdw[line].p_phys >> 12));
+					   (u64)(p_shdw[line].phys_addr >> 12));
 			}
 
 			STORE_RT_REG_AGG(p_hwfn, rt_offst, ilt_hw_entry);
@@ -2050,10 +1948,10 @@ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info)
 	line = p_info->iid / cxts_per_p;
 
 	/* Make sure context is allocated (dynamic allocation) */
-	if (!p_mngr->ilt_shadow[line].p_virt)
+	if (!p_mngr->ilt_shadow[line].virt_addr)
 		return -EINVAL;
 
-	p_info->p_cxt = p_mngr->ilt_shadow[line].p_virt +
+	p_info->p_cxt = p_mngr->ilt_shadow[line].virt_addr +
 			p_info->iid % cxts_per_p * conn_cxt_size;
 
 	DP_VERBOSE(p_hwfn, (QED_MSG_ILT | QED_MSG_CXT),
@@ -2234,7 +2132,7 @@ int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
 	for (i = 0; i < total_lines; i++) {
 		shadow_line = i + p_fl_seg->start_line -
 		    p_hwfn->p_cxt_mngr->pf_start_line;
-		p_info->blocks[i] = p_mngr->ilt_shadow[shadow_line].p_virt;
+		p_info->blocks[i] = p_mngr->ilt_shadow[shadow_line].virt_addr;
 	}
 	p_info->waste = ILT_PAGE_IN_BYTES(p_cli->p_size.val) -
 	    p_fl_seg->real_size_in_page;
@@ -2296,7 +2194,7 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 
 	mutex_lock(&p_hwfn->p_cxt_mngr->mutex);
 
-	if (p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_virt)
+	if (p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].virt_addr)
 		goto out0;
 
 	p_ptt = qed_ptt_acquire(p_hwfn);
@@ -2334,8 +2232,8 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 		}
 	}
 
-	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_virt = p_virt;
-	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_phys = p_phys;
+	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].virt_addr = p_virt;
+	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].phys_addr = p_phys;
 	p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].size =
 	    p_blk->real_size_in_page;
 
@@ -2345,9 +2243,9 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 
 	ilt_hw_entry = 0;
 	SET_FIELD(ilt_hw_entry, ILT_ENTRY_VALID, 1ULL);
-	SET_FIELD(ilt_hw_entry,
-		  ILT_ENTRY_PHY_ADDR,
-		  (p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].p_phys >> 12));
+	SET_FIELD(ilt_hw_entry, ILT_ENTRY_PHY_ADDR,
+		  (p_hwfn->p_cxt_mngr->ilt_shadow[shadow_line].phys_addr
+		   >> 12));
 
 	/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */
 	qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry,
@@ -2434,16 +2332,16 @@ qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn,
 	}
 
 	for (i = shadow_start_line; i < shadow_end_line; i++) {
-		if (!p_hwfn->p_cxt_mngr->ilt_shadow[i].p_virt)
+		if (!p_hwfn->p_cxt_mngr->ilt_shadow[i].virt_addr)
 			continue;
 
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 				  p_hwfn->p_cxt_mngr->ilt_shadow[i].size,
-				  p_hwfn->p_cxt_mngr->ilt_shadow[i].p_virt,
-				  p_hwfn->p_cxt_mngr->ilt_shadow[i].p_phys);
+				  p_hwfn->p_cxt_mngr->ilt_shadow[i].virt_addr,
+				  p_hwfn->p_cxt_mngr->ilt_shadow[i].phys_addr);
 
-		p_hwfn->p_cxt_mngr->ilt_shadow[i].p_virt = NULL;
-		p_hwfn->p_cxt_mngr->ilt_shadow[i].p_phys = 0;
+		p_hwfn->p_cxt_mngr->ilt_shadow[i].virt_addr = NULL;
+		p_hwfn->p_cxt_mngr->ilt_shadow[i].phys_addr = 0;
 		p_hwfn->p_cxt_mngr->ilt_shadow[i].size = 0;
 
 		/* compute absolute offset */
@@ -2547,8 +2445,76 @@ int qed_cxt_get_task_ctx(struct qed_hwfn *p_hwfn,
 
 	ilt_idx = tid / num_tids_per_block + p_seg->start_line -
 		  p_mngr->pf_start_line;
-	*pp_task_ctx = (u8 *)p_mngr->ilt_shadow[ilt_idx].p_virt +
+	*pp_task_ctx = (u8 *)p_mngr->ilt_shadow[ilt_idx].virt_addr +
 		       (tid % num_tids_per_block) * tid_size;
 
 	return 0;
 }
+
+static u16 qed_blk_calculate_pages(struct qed_ilt_cli_blk *p_blk)
+{
+	if (p_blk->real_size_in_page == 0)
+		return 0;
+
+	return DIV_ROUND_UP(p_blk->total_size, p_blk->real_size_in_page);
+}
+
+u16 qed_get_cdut_num_pf_init_pages(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_blk;
+	u16 i, pages = 0;
+
+	p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+	for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) {
+		p_blk = &p_cli->pf_blks[CDUT_FL_SEG_BLK(i, PF)];
+		pages += qed_blk_calculate_pages(p_blk);
+	}
+
+	return pages;
+}
+
+u16 qed_get_cdut_num_vf_init_pages(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_blk;
+	u16 i, pages = 0;
+
+	p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+	for (i = 0; i < NUM_TASK_VF_SEGMENTS; i++) {
+		p_blk = &p_cli->vf_blks[CDUT_FL_SEG_BLK(i, VF)];
+		pages += qed_blk_calculate_pages(p_blk);
+	}
+
+	return pages;
+}
+
+u16 qed_get_cdut_num_pf_work_pages(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_blk;
+	u16 i, pages = 0;
+
+	p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+	for (i = 0; i < NUM_TASK_PF_SEGMENTS; i++) {
+		p_blk = &p_cli->pf_blks[CDUT_SEG_BLK(i)];
+		pages += qed_blk_calculate_pages(p_blk);
+	}
+
+	return pages;
+}
+
+u16 qed_get_cdut_num_vf_work_pages(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ilt_client_cfg *p_cli;
+	struct qed_ilt_cli_blk *p_blk;
+	u16 pages = 0, i;
+
+	p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
+	for (i = 0; i < NUM_TASK_VF_SEGMENTS; i++) {
+		p_blk = &p_cli->vf_blks[CDUT_SEG_BLK(i)];
+		pages += qed_blk_calculate_pages(p_blk);
+	}
+
+	return pages;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index 758a8b4c0de8..c4e815f6cabd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -242,4 +242,134 @@ int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto);
 #define QED_CTX_FL_MEM 1
 int qed_cxt_get_task_ctx(struct qed_hwfn *p_hwfn,
 			 u32 tid, u8 ctx_type, void **task_ctx);
+
+/* Max number of connection types in HW (DQ/CDU etc.) */
+#define MAX_CONN_TYPES          PROTOCOLID_COMMON
+#define NUM_TASK_TYPES          2
+#define NUM_TASK_PF_SEGMENTS    4
+#define NUM_TASK_VF_SEGMENTS    1
+
+/* PF per protocl configuration object */
+#define TASK_SEGMENTS   (NUM_TASK_PF_SEGMENTS + NUM_TASK_VF_SEGMENTS)
+#define TASK_SEGMENT_VF (NUM_TASK_PF_SEGMENTS)
+
+struct qed_tid_seg {
+	u32 count;
+	u8 type;
+	bool has_fl_mem;
+};
+
+struct qed_conn_type_cfg {
+	u32 cid_count;
+	u32 cids_per_vf;
+	struct qed_tid_seg tid_seg[TASK_SEGMENTS];
+};
+
+/* ILT Client configuration,
+ * Per connection type (protocol) resources (cids, tis, vf cids etc.)
+ * 1 - for connection context (CDUC) and for each task context we need two
+ * values, for regular task context and for force load memory
+ */
+#define ILT_CLI_PF_BLOCKS       (1 + NUM_TASK_PF_SEGMENTS * 2)
+#define ILT_CLI_VF_BLOCKS       (1 + NUM_TASK_VF_SEGMENTS * 2)
+#define CDUC_BLK                (0)
+#define SRQ_BLK                 (0)
+#define CDUT_SEG_BLK(n)         (1 + (u8)(n))
+#define CDUT_FL_SEG_BLK(n, X)   (1 + (n) + NUM_TASK_ ## X ## _SEGMENTS)
+
+struct ilt_cfg_pair {
+	u32 reg;
+	u32 val;
+};
+
+struct qed_ilt_cli_blk {
+	u32 total_size;		/* 0 means not active */
+	u32 real_size_in_page;
+	u32 start_line;
+	u32 dynamic_line_offset;
+	u32 dynamic_line_cnt;
+};
+
+struct qed_ilt_client_cfg {
+	bool active;
+
+	/* ILT boundaries */
+	struct ilt_cfg_pair first;
+	struct ilt_cfg_pair last;
+	struct ilt_cfg_pair p_size;
+
+	/* ILT client blocks for PF */
+	struct qed_ilt_cli_blk pf_blks[ILT_CLI_PF_BLOCKS];
+	u32 pf_total_lines;
+
+	/* ILT client blocks for VFs */
+	struct qed_ilt_cli_blk vf_blks[ILT_CLI_VF_BLOCKS];
+	u32 vf_total_lines;
+};
+
+struct qed_cid_acquired_map {
+	u32		start_cid;
+	u32		max_count;
+	unsigned long	*cid_map;
+};
+
+struct qed_src_t2 {
+	struct phys_mem_desc *dma_mem;
+	u32 num_pages;
+	u64 first_free;
+	u64 last_free;
+};
+
+struct qed_cxt_mngr {
+	/* Per protocl configuration */
+	struct qed_conn_type_cfg	conn_cfg[MAX_CONN_TYPES];
+
+	/* computed ILT structure */
+	struct qed_ilt_client_cfg	clients[MAX_ILT_CLIENTS];
+
+	/* Task type sizes */
+	u32 task_type_size[NUM_TASK_TYPES];
+
+	/* total number of VFs for this hwfn -
+	 * ALL VFs are symmetric in terms of HW resources
+	 */
+	u32 vf_count;
+	u32 first_vf_in_pf;
+
+	/* Acquired CIDs */
+	struct qed_cid_acquired_map	acquired[MAX_CONN_TYPES];
+
+	struct qed_cid_acquired_map
+	acquired_vf[MAX_CONN_TYPES][MAX_NUM_VFS];
+
+	/* ILT  shadow table */
+	struct phys_mem_desc *ilt_shadow;
+	u32 ilt_shadow_size;
+	u32 pf_start_line;
+
+	/* Mutex for a dynamic ILT allocation */
+	struct mutex mutex;
+
+	/* SRC T2 */
+	struct qed_src_t2 src_t2;
+	u32 t2_num_pages;
+	u64 first_free;
+	u64 last_free;
+
+	/* total number of SRQ's for this hwfn */
+	u32 srq_count;
+
+	/* Maximal number of L2 steering filters */
+	u32 arfs_count;
+
+	u8 task_type_id;
+	u16 task_ctx_size;
+	u16 conn_ctx_size;
+};
+
+u16 qed_get_cdut_num_pf_init_pages(struct qed_hwfn *p_hwfn);
+u16 qed_get_cdut_num_vf_init_pages(struct qed_hwfn *p_hwfn);
+u16 qed_get_cdut_num_pf_work_pages(struct qed_hwfn *p_hwfn);
+u16 qed_get_cdut_num_vf_work_pages(struct qed_hwfn *p_hwfn);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 859caa6c1a1f..f4eebaabb6d0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -7,6 +7,7 @@
 #include <linux/vmalloc.h>
 #include <linux/crc32.h>
 #include "qed.h"
+#include "qed_cxt.h"
 #include "qed_hsi.h"
 #include "qed_hw.h"
 #include "qed_mcp.h"
@@ -22,27 +23,28 @@ enum mem_groups {
 	MEM_GROUP_BRB_RAM,
 	MEM_GROUP_BRB_MEM,
 	MEM_GROUP_PRS_MEM,
+	MEM_GROUP_SDM_MEM,
+	MEM_GROUP_PBUF,
 	MEM_GROUP_IOR,
+	MEM_GROUP_RAM,
 	MEM_GROUP_BTB_RAM,
+	MEM_GROUP_RDIF_CTX,
+	MEM_GROUP_TDIF_CTX,
+	MEM_GROUP_CFC_MEM,
 	MEM_GROUP_CONN_CFC_MEM,
-	MEM_GROUP_TASK_CFC_MEM,
 	MEM_GROUP_CAU_PI,
 	MEM_GROUP_CAU_MEM,
+	MEM_GROUP_CAU_MEM_EXT,
 	MEM_GROUP_PXP_ILT,
-	MEM_GROUP_TM_MEM,
-	MEM_GROUP_SDM_MEM,
-	MEM_GROUP_PBUF,
-	MEM_GROUP_RAM,
 	MEM_GROUP_MULD_MEM,
 	MEM_GROUP_BTB_MEM,
-	MEM_GROUP_RDIF_CTX,
-	MEM_GROUP_TDIF_CTX,
-	MEM_GROUP_CFC_MEM,
 	MEM_GROUP_IGU_MEM,
 	MEM_GROUP_IGU_MSIX,
 	MEM_GROUP_CAU_SB,
 	MEM_GROUP_BMB_RAM,
 	MEM_GROUP_BMB_MEM,
+	MEM_GROUP_TM_MEM,
+	MEM_GROUP_TASK_CFC_MEM,
 	MEM_GROUPS_NUM
 };
 
@@ -56,27 +58,28 @@ static const char * const s_mem_group_names[] = {
 	"BRB_RAM",
 	"BRB_MEM",
 	"PRS_MEM",
+	"SDM_MEM",
+	"PBUF",
 	"IOR",
+	"RAM",
 	"BTB_RAM",
+	"RDIF_CTX",
+	"TDIF_CTX",
+	"CFC_MEM",
 	"CONN_CFC_MEM",
-	"TASK_CFC_MEM",
 	"CAU_PI",
 	"CAU_MEM",
+	"CAU_MEM_EXT",
 	"PXP_ILT",
-	"TM_MEM",
-	"SDM_MEM",
-	"PBUF",
-	"RAM",
 	"MULD_MEM",
 	"BTB_MEM",
-	"RDIF_CTX",
-	"TDIF_CTX",
-	"CFC_MEM",
 	"IGU_MEM",
 	"IGU_MSIX",
 	"CAU_SB",
 	"BMB_RAM",
 	"BMB_MEM",
+	"TM_MEM",
+	"TASK_CFC_MEM",
 };
 
 /* Idle check conditions */
@@ -170,35 +173,66 @@ static u32(*cond_arr[]) (const u32 *r, const u32 *imm) = {
 	cond13,
 };
 
+#define NUM_PHYS_BLOCKS 84
+
+#define NUM_DBG_RESET_REGS 8
+
 /******************************* Data Types **********************************/
 
-enum platform_ids {
-	PLATFORM_ASIC,
+enum hw_types {
+	HW_TYPE_ASIC,
 	PLATFORM_RESERVED,
 	PLATFORM_RESERVED2,
 	PLATFORM_RESERVED3,
-	MAX_PLATFORM_IDS
+	PLATFORM_RESERVED4,
+	MAX_HW_TYPES
+};
+
+/* CM context types */
+enum cm_ctx_types {
+	CM_CTX_CONN_AG,
+	CM_CTX_CONN_ST,
+	CM_CTX_TASK_AG,
+	CM_CTX_TASK_ST,
+	NUM_CM_CTX_TYPES
+};
+
+/* Debug bus frame modes */
+enum dbg_bus_frame_modes {
+	DBG_BUS_FRAME_MODE_4ST = 0,	/* 4 Storm dwords (no HW) */
+	DBG_BUS_FRAME_MODE_2ST_2HW = 1,	/* 2 Storm dwords, 2 HW dwords */
+	DBG_BUS_FRAME_MODE_1ST_3HW = 2,	/* 1 Storm dwords, 3 HW dwords */
+	DBG_BUS_FRAME_MODE_4HW = 3,	/* 4 HW dwords (no Storms) */
+	DBG_BUS_FRAME_MODE_8HW = 4,	/* 8 HW dwords (no Storms) */
+	DBG_BUS_NUM_FRAME_MODES
 };
 
 /* Chip constant definitions */
 struct chip_defs {
 	const char *name;
+	u32 num_ilt_pages;
 };
 
-/* Platform constant definitions */
-struct platform_defs {
+/* HW type constant definitions */
+struct hw_type_defs {
 	const char *name;
 	u32 delay_factor;
 	u32 dmae_thresh;
 	u32 log_thresh;
 };
 
+/* RBC reset definitions */
+struct rbc_reset_defs {
+	u32 reset_reg_addr;
+	u32 reset_val[MAX_CHIP_IDS];
+};
+
 /* Storm constant definitions.
  * Addresses are in bytes, sizes are in quad-regs.
  */
 struct storm_defs {
 	char letter;
-	enum block_id block_id;
+	enum block_id sem_block_id;
 	enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS];
 	bool has_vfc;
 	u32 sem_fast_mem_addr;
@@ -207,47 +241,26 @@ struct storm_defs {
 	u32 sem_slow_mode_addr;
 	u32 sem_slow_mode1_conf_addr;
 	u32 sem_sync_dbg_empty_addr;
-	u32 sem_slow_dbg_empty_addr;
+	u32 sem_gpre_vect_addr;
 	u32 cm_ctx_wr_addr;
-	u32 cm_conn_ag_ctx_lid_size;
-	u32 cm_conn_ag_ctx_rd_addr;
-	u32 cm_conn_st_ctx_lid_size;
-	u32 cm_conn_st_ctx_rd_addr;
-	u32 cm_task_ag_ctx_lid_size;
-	u32 cm_task_ag_ctx_rd_addr;
-	u32 cm_task_st_ctx_lid_size;
-	u32 cm_task_st_ctx_rd_addr;
+	u32 cm_ctx_rd_addr[NUM_CM_CTX_TYPES];
+	u32 cm_ctx_lid_sizes[MAX_CHIP_IDS][NUM_CM_CTX_TYPES];
 };
 
-/* Block constant definitions */
-struct block_defs {
-	const char *name;
-	bool exists[MAX_CHIP_IDS];
-	bool associated_to_storm;
-
-	/* Valid only if associated_to_storm is true */
-	u32 storm_id;
-	enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS];
-	u32 dbg_select_addr;
-	u32 dbg_enable_addr;
-	u32 dbg_shift_addr;
-	u32 dbg_force_valid_addr;
-	u32 dbg_force_frame_addr;
-	bool has_reset_bit;
-
-	/* If true, block is taken out of reset before dump */
-	bool unreset;
-	enum dbg_reset_regs reset_reg;
-
-	/* Bit offset in reset register */
-	u8 reset_bit_offset;
+/* Debug Bus Constraint operation constant definitions */
+struct dbg_bus_constraint_op_defs {
+	u8 hw_op_val;
+	bool is_cyclic;
 };
 
-/* Reset register definitions */
-struct reset_reg_defs {
-	u32 addr;
+/* Storm Mode definitions */
+struct storm_mode_defs {
+	const char *name;
+	bool is_fast_dbg;
+	u8 id_in_hw;
+	u32 src_disable_reg_addr;
+	u32 src_enable_val;
 	bool exists[MAX_CHIP_IDS];
-	u32 unreset_val[MAX_CHIP_IDS];
 };
 
 struct grc_param_defs {
@@ -257,7 +270,7 @@ struct grc_param_defs {
 	bool is_preset;
 	bool is_persistent;
 	u32 exclude_all_preset_val;
-	u32 crash_preset_val;
+	u32 crash_preset_val[MAX_CHIP_IDS];
 };
 
 /* Address is in 128b units. Width is in bits. */
@@ -314,15 +327,7 @@ struct split_type_defs {
 
 /******************************** Constants **********************************/
 
-#define MAX_LCIDS			320
-#define MAX_LTIDS			320
-
-#define NUM_IOR_SETS			2
-#define IORS_PER_SET			176
-#define IOR_SET_OFFSET(set_id)		((set_id) * 256)
-
 #define BYTES_IN_DWORD			sizeof(u32)
-
 /* In the macros below, size and offset are specified in bits */
 #define CEIL_DWORDS(size)		DIV_ROUND_UP(size, 32)
 #define FIELD_BIT_OFFSET(type, field)	type ## _ ## field ## _ ## OFFSET
@@ -348,20 +353,17 @@ struct split_type_defs {
 			qed_wr(dev, ptt, addr,	(arr)[i]); \
 	} while (0)
 
-#define ARR_REG_RD(dev, ptt, addr, arr, arr_size) \
-	do { \
-		for (i = 0; i < (arr_size); i++) \
-			(arr)[i] = qed_rd(dev, ptt, addr); \
-	} while (0)
-
 #define DWORDS_TO_BYTES(dwords)		((dwords) * BYTES_IN_DWORD)
 #define BYTES_TO_DWORDS(bytes)		((bytes) / BYTES_IN_DWORD)
 
-/* Extra lines include a signature line + optional latency events line */
-#define NUM_EXTRA_DBG_LINES(block_desc) \
-	(1 + ((block_desc)->has_latency_events ? 1 : 0))
-#define NUM_DBG_LINES(block_desc) \
-	((block_desc)->num_of_lines + NUM_EXTRA_DBG_LINES(block_desc))
+/* extra lines include a signature line + optional latency events line */
+#define NUM_EXTRA_DBG_LINES(block) \
+	(GET_FIELD((block)->flags, DBG_BLOCK_CHIP_HAS_LATENCY_EVENTS) ? 2 : 1)
+#define NUM_DBG_LINES(block) \
+	((block)->num_of_dbg_bus_lines + NUM_EXTRA_DBG_LINES(block))
+
+#define USE_DMAE			true
+#define PROTECT_WIDE_BUS		true
 
 #define RAM_LINES_TO_DWORDS(lines)	((lines) * 2)
 #define RAM_LINES_TO_BYTES(lines) \
@@ -380,6 +382,9 @@ struct split_type_defs {
 #define IDLE_CHK_RESULT_REG_HDR_DWORDS \
 	BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_reg_hdr))
 
+#define PAGE_MEM_DESC_SIZE_DWORDS \
+	BYTES_TO_DWORDS(sizeof(struct phys_mem_desc))
+
 #define IDLE_CHK_MAX_ENTRIES_SIZE	32
 
 /* The sizes and offsets below are specified in bits */
@@ -425,7 +430,9 @@ struct split_type_defs {
 
 #define STATIC_DEBUG_LINE_DWORDS	9
 
-#define NUM_COMMON_GLOBAL_PARAMS	8
+#define NUM_COMMON_GLOBAL_PARAMS	9
+
+#define MAX_RECURSION_DEPTH		10
 
 #define FW_IMG_MAIN			1
 
@@ -449,1054 +456,121 @@ struct split_type_defs {
 	(MCP_REG_SCRATCH + \
 	 offsetof(struct static_init, sections[SPAD_SECTION_TRACE]))
 
+#define MAX_SW_PLTAFORM_STR_SIZE	64
+
 #define EMPTY_FW_VERSION_STR		"???_???_???_???"
 #define EMPTY_FW_IMAGE_STR		"???????????????"
 
 /***************************** Constant Arrays *******************************/
 
-struct dbg_array {
-	const u32 *ptr;
-	u32 size_in_dwords;
-};
-
-/* Debug arrays */
-static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} };
-
 /* Chip constant definitions array */
 static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = {
-	{"bb"},
-	{"ah"},
-	{"reserved"},
+	{"bb", PSWRQ2_REG_ILT_MEMORY_SIZE_BB / 2},
+	{"ah", PSWRQ2_REG_ILT_MEMORY_SIZE_K2 / 2}
 };
 
 /* Storm constant definitions array */
 static struct storm_defs s_storm_defs[] = {
 	/* Tstorm */
 	{'T', BLOCK_TSEM,
-	 {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT,
-	  DBG_BUS_CLIENT_RBCT}, true,
-	 TSEM_REG_FAST_MEMORY,
-	 TSEM_REG_DBG_FRAME_MODE_BB_K2, TSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
-	 TSEM_REG_SLOW_DBG_MODE_BB_K2, TSEM_REG_DBG_MODE1_CFG_BB_K2,
-	 TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY_BB_K2,
-	 TCM_REG_CTX_RBC_ACCS,
-	 4, TCM_REG_AGG_CON_CTX,
-	 16, TCM_REG_SM_CON_CTX,
-	 2, TCM_REG_AGG_TASK_CTX,
-	 4, TCM_REG_SM_TASK_CTX},
+		{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+		true,
+		TSEM_REG_FAST_MEMORY,
+		TSEM_REG_DBG_FRAME_MODE_BB_K2, TSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+		TSEM_REG_SLOW_DBG_MODE_BB_K2, TSEM_REG_DBG_MODE1_CFG_BB_K2,
+		TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_DBG_GPRE_VECT,
+		TCM_REG_CTX_RBC_ACCS,
+		{TCM_REG_AGG_CON_CTX, TCM_REG_SM_CON_CTX, TCM_REG_AGG_TASK_CTX,
+		 TCM_REG_SM_TASK_CTX},
+		{{4, 16, 2, 4}, {4, 16, 2, 4}} /* {bb} {k2} */
+	},
 
 	/* Mstorm */
 	{'M', BLOCK_MSEM,
-	 {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM,
-	  DBG_BUS_CLIENT_RBCM}, false,
-	 MSEM_REG_FAST_MEMORY,
-	 MSEM_REG_DBG_FRAME_MODE_BB_K2, MSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
-	 MSEM_REG_SLOW_DBG_MODE_BB_K2, MSEM_REG_DBG_MODE1_CFG_BB_K2,
-	 MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY_BB_K2,
-	 MCM_REG_CTX_RBC_ACCS,
-	 1, MCM_REG_AGG_CON_CTX,
-	 10, MCM_REG_SM_CON_CTX,
-	 2, MCM_REG_AGG_TASK_CTX,
-	 7, MCM_REG_SM_TASK_CTX},
+		{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+		false,
+		MSEM_REG_FAST_MEMORY,
+		MSEM_REG_DBG_FRAME_MODE_BB_K2,
+		MSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+		MSEM_REG_SLOW_DBG_MODE_BB_K2,
+		MSEM_REG_DBG_MODE1_CFG_BB_K2,
+		MSEM_REG_SYNC_DBG_EMPTY,
+		MSEM_REG_DBG_GPRE_VECT,
+		MCM_REG_CTX_RBC_ACCS,
+		{MCM_REG_AGG_CON_CTX, MCM_REG_SM_CON_CTX, MCM_REG_AGG_TASK_CTX,
+		 MCM_REG_SM_TASK_CTX },
+		{{1, 10, 2, 7}, {1, 10, 2, 7}} /* {bb} {k2}*/
+	},
 
 	/* Ustorm */
 	{'U', BLOCK_USEM,
-	 {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU,
-	  DBG_BUS_CLIENT_RBCU}, false,
-	 USEM_REG_FAST_MEMORY,
-	 USEM_REG_DBG_FRAME_MODE_BB_K2, USEM_REG_SLOW_DBG_ACTIVE_BB_K2,
-	 USEM_REG_SLOW_DBG_MODE_BB_K2, USEM_REG_DBG_MODE1_CFG_BB_K2,
-	 USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY_BB_K2,
-	 UCM_REG_CTX_RBC_ACCS,
-	 2, UCM_REG_AGG_CON_CTX,
-	 13, UCM_REG_SM_CON_CTX,
-	 3, UCM_REG_AGG_TASK_CTX,
-	 3, UCM_REG_SM_TASK_CTX},
+		{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+		false,
+		USEM_REG_FAST_MEMORY,
+		USEM_REG_DBG_FRAME_MODE_BB_K2,
+		USEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+		USEM_REG_SLOW_DBG_MODE_BB_K2,
+		USEM_REG_DBG_MODE1_CFG_BB_K2,
+		USEM_REG_SYNC_DBG_EMPTY,
+		USEM_REG_DBG_GPRE_VECT,
+		UCM_REG_CTX_RBC_ACCS,
+		{UCM_REG_AGG_CON_CTX, UCM_REG_SM_CON_CTX, UCM_REG_AGG_TASK_CTX,
+		 UCM_REG_SM_TASK_CTX},
+		{{2, 13, 3, 3}, {2, 13, 3, 3}} /* {bb} {k2} */
+	},
 
 	/* Xstorm */
 	{'X', BLOCK_XSEM,
-	 {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX,
-	  DBG_BUS_CLIENT_RBCX}, false,
-	 XSEM_REG_FAST_MEMORY,
-	 XSEM_REG_DBG_FRAME_MODE_BB_K2, XSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
-	 XSEM_REG_SLOW_DBG_MODE_BB_K2, XSEM_REG_DBG_MODE1_CFG_BB_K2,
-	 XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY_BB_K2,
-	 XCM_REG_CTX_RBC_ACCS,
-	 9, XCM_REG_AGG_CON_CTX,
-	 15, XCM_REG_SM_CON_CTX,
-	 0, 0,
-	 0, 0},
+		{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+		false,
+		XSEM_REG_FAST_MEMORY,
+		XSEM_REG_DBG_FRAME_MODE_BB_K2,
+		XSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+		XSEM_REG_SLOW_DBG_MODE_BB_K2,
+		XSEM_REG_DBG_MODE1_CFG_BB_K2,
+		XSEM_REG_SYNC_DBG_EMPTY,
+		XSEM_REG_DBG_GPRE_VECT,
+		XCM_REG_CTX_RBC_ACCS,
+		{XCM_REG_AGG_CON_CTX, XCM_REG_SM_CON_CTX, 0, 0},
+		{{9, 15, 0, 0}, {9, 15,	0, 0}} /* {bb} {k2} */
+	},
 
 	/* Ystorm */
 	{'Y', BLOCK_YSEM,
-	 {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY,
-	  DBG_BUS_CLIENT_RBCY}, false,
-	 YSEM_REG_FAST_MEMORY,
-	 YSEM_REG_DBG_FRAME_MODE_BB_K2, YSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
-	 YSEM_REG_SLOW_DBG_MODE_BB_K2, YSEM_REG_DBG_MODE1_CFG_BB_K2,
-	 YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY_BB_K2,
-	 YCM_REG_CTX_RBC_ACCS,
-	 2, YCM_REG_AGG_CON_CTX,
-	 3, YCM_REG_SM_CON_CTX,
-	 2, YCM_REG_AGG_TASK_CTX,
-	 12, YCM_REG_SM_TASK_CTX},
+		{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+		false,
+		YSEM_REG_FAST_MEMORY,
+		YSEM_REG_DBG_FRAME_MODE_BB_K2,
+		YSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+		YSEM_REG_SLOW_DBG_MODE_BB_K2,
+		YSEM_REG_DBG_MODE1_CFG_BB_K2,
+		YSEM_REG_SYNC_DBG_EMPTY,
+		YSEM_REG_DBG_GPRE_VECT,
+		YCM_REG_CTX_RBC_ACCS,
+		{YCM_REG_AGG_CON_CTX, YCM_REG_SM_CON_CTX, YCM_REG_AGG_TASK_CTX,
+		 YCM_REG_SM_TASK_CTX},
+		{{2, 3, 2, 12}, {2, 3, 2, 12}} /* {bb} {k2} */
+	},
 
 	/* Pstorm */
 	{'P', BLOCK_PSEM,
-	 {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS,
-	  DBG_BUS_CLIENT_RBCS}, true,
-	 PSEM_REG_FAST_MEMORY,
-	 PSEM_REG_DBG_FRAME_MODE_BB_K2, PSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
-	 PSEM_REG_SLOW_DBG_MODE_BB_K2, PSEM_REG_DBG_MODE1_CFG_BB_K2,
-	 PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY_BB_K2,
-	 PCM_REG_CTX_RBC_ACCS,
-	 0, 0,
-	 10, PCM_REG_SM_CON_CTX,
-	 0, 0,
-	 0, 0}
-};
-
-/* Block definitions array */
-
-static struct block_defs block_grc_defs = {
-	"grc",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
-	GRC_REG_DBG_SELECT, GRC_REG_DBG_DWORD_ENABLE,
-	GRC_REG_DBG_SHIFT, GRC_REG_DBG_FORCE_VALID,
-	GRC_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_UA, 1
-};
-
-static struct block_defs block_miscs_defs = {
-	"miscs", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_misc_defs = {
-	"misc", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_dbu_defs = {
-	"dbu", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_pglue_b_defs = {
-	"pglue_b",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH},
-	PGLUE_B_REG_DBG_SELECT, PGLUE_B_REG_DBG_DWORD_ENABLE,
-	PGLUE_B_REG_DBG_SHIFT, PGLUE_B_REG_DBG_FORCE_VALID,
-	PGLUE_B_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 1
-};
-
-static struct block_defs block_cnig_defs = {
-	"cnig",
-	{true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW,
-	 DBG_BUS_CLIENT_RBCW},
-	CNIG_REG_DBG_SELECT_K2_E5, CNIG_REG_DBG_DWORD_ENABLE_K2_E5,
-	CNIG_REG_DBG_SHIFT_K2_E5, CNIG_REG_DBG_FORCE_VALID_K2_E5,
-	CNIG_REG_DBG_FORCE_FRAME_K2_E5,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 0
-};
-
-static struct block_defs block_cpmu_defs = {
-	"cpmu", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 8
-};
-
-static struct block_defs block_ncsi_defs = {
-	"ncsi",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
-	NCSI_REG_DBG_SELECT, NCSI_REG_DBG_DWORD_ENABLE,
-	NCSI_REG_DBG_SHIFT, NCSI_REG_DBG_FORCE_VALID,
-	NCSI_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 5
-};
-
-static struct block_defs block_opte_defs = {
-	"opte", {true, true, false}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 4
-};
-
-static struct block_defs block_bmb_defs = {
-	"bmb",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB, DBG_BUS_CLIENT_RBCB},
-	BMB_REG_DBG_SELECT, BMB_REG_DBG_DWORD_ENABLE,
-	BMB_REG_DBG_SHIFT, BMB_REG_DBG_FORCE_VALID,
-	BMB_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISCS_PL_UA, 7
-};
-
-static struct block_defs block_pcie_defs = {
-	"pcie",
-	{true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH,
-	 DBG_BUS_CLIENT_RBCH},
-	PCIE_REG_DBG_COMMON_SELECT_K2_E5,
-	PCIE_REG_DBG_COMMON_DWORD_ENABLE_K2_E5,
-	PCIE_REG_DBG_COMMON_SHIFT_K2_E5,
-	PCIE_REG_DBG_COMMON_FORCE_VALID_K2_E5,
-	PCIE_REG_DBG_COMMON_FORCE_FRAME_K2_E5,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_mcp_defs = {
-	"mcp", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_mcp2_defs = {
-	"mcp2",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
-	MCP2_REG_DBG_SELECT, MCP2_REG_DBG_DWORD_ENABLE,
-	MCP2_REG_DBG_SHIFT, MCP2_REG_DBG_FORCE_VALID,
-	MCP2_REG_DBG_FORCE_FRAME,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_pswhst_defs = {
-	"pswhst",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PSWHST_REG_DBG_SELECT, PSWHST_REG_DBG_DWORD_ENABLE,
-	PSWHST_REG_DBG_SHIFT, PSWHST_REG_DBG_FORCE_VALID,
-	PSWHST_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 0
-};
-
-static struct block_defs block_pswhst2_defs = {
-	"pswhst2",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PSWHST2_REG_DBG_SELECT, PSWHST2_REG_DBG_DWORD_ENABLE,
-	PSWHST2_REG_DBG_SHIFT, PSWHST2_REG_DBG_FORCE_VALID,
-	PSWHST2_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 0
-};
-
-static struct block_defs block_pswrd_defs = {
-	"pswrd",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PSWRD_REG_DBG_SELECT, PSWRD_REG_DBG_DWORD_ENABLE,
-	PSWRD_REG_DBG_SHIFT, PSWRD_REG_DBG_FORCE_VALID,
-	PSWRD_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 2
-};
-
-static struct block_defs block_pswrd2_defs = {
-	"pswrd2",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PSWRD2_REG_DBG_SELECT, PSWRD2_REG_DBG_DWORD_ENABLE,
-	PSWRD2_REG_DBG_SHIFT, PSWRD2_REG_DBG_FORCE_VALID,
-	PSWRD2_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 2
-};
-
-static struct block_defs block_pswwr_defs = {
-	"pswwr",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PSWWR_REG_DBG_SELECT, PSWWR_REG_DBG_DWORD_ENABLE,
-	PSWWR_REG_DBG_SHIFT, PSWWR_REG_DBG_FORCE_VALID,
-	PSWWR_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 3
-};
-
-static struct block_defs block_pswwr2_defs = {
-	"pswwr2", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 3
-};
-
-static struct block_defs block_pswrq_defs = {
-	"pswrq",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PSWRQ_REG_DBG_SELECT, PSWRQ_REG_DBG_DWORD_ENABLE,
-	PSWRQ_REG_DBG_SHIFT, PSWRQ_REG_DBG_FORCE_VALID,
-	PSWRQ_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 1
-};
-
-static struct block_defs block_pswrq2_defs = {
-	"pswrq2",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PSWRQ2_REG_DBG_SELECT, PSWRQ2_REG_DBG_DWORD_ENABLE,
-	PSWRQ2_REG_DBG_SHIFT, PSWRQ2_REG_DBG_FORCE_VALID,
-	PSWRQ2_REG_DBG_FORCE_FRAME,
-	true, false, DBG_RESET_REG_MISC_PL_HV, 1
-};
-
-static struct block_defs block_pglcs_defs = {
-	"pglcs",
-	{true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH,
-	 DBG_BUS_CLIENT_RBCH},
-	PGLCS_REG_DBG_SELECT_K2_E5, PGLCS_REG_DBG_DWORD_ENABLE_K2_E5,
-	PGLCS_REG_DBG_SHIFT_K2_E5, PGLCS_REG_DBG_FORCE_VALID_K2_E5,
-	PGLCS_REG_DBG_FORCE_FRAME_K2_E5,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 2
-};
-
-static struct block_defs block_ptu_defs = {
-	"ptu",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	PTU_REG_DBG_SELECT, PTU_REG_DBG_DWORD_ENABLE,
-	PTU_REG_DBG_SHIFT, PTU_REG_DBG_FORCE_VALID,
-	PTU_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 20
-};
-
-static struct block_defs block_dmae_defs = {
-	"dmae",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	DMAE_REG_DBG_SELECT, DMAE_REG_DBG_DWORD_ENABLE,
-	DMAE_REG_DBG_SHIFT, DMAE_REG_DBG_FORCE_VALID,
-	DMAE_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 28
-};
-
-static struct block_defs block_tcm_defs = {
-	"tcm",
-	{true, true, true}, true, DBG_TSTORM_ID,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
-	TCM_REG_DBG_SELECT, TCM_REG_DBG_DWORD_ENABLE,
-	TCM_REG_DBG_SHIFT, TCM_REG_DBG_FORCE_VALID,
-	TCM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 5
-};
-
-static struct block_defs block_mcm_defs = {
-	"mcm",
-	{true, true, true}, true, DBG_MSTORM_ID,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM, DBG_BUS_CLIENT_RBCM},
-	MCM_REG_DBG_SELECT, MCM_REG_DBG_DWORD_ENABLE,
-	MCM_REG_DBG_SHIFT, MCM_REG_DBG_FORCE_VALID,
-	MCM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 3
-};
-
-static struct block_defs block_ucm_defs = {
-	"ucm",
-	{true, true, true}, true, DBG_USTORM_ID,
-	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
-	UCM_REG_DBG_SELECT, UCM_REG_DBG_DWORD_ENABLE,
-	UCM_REG_DBG_SHIFT, UCM_REG_DBG_FORCE_VALID,
-	UCM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 8
-};
-
-static struct block_defs block_xcm_defs = {
-	"xcm",
-	{true, true, true}, true, DBG_XSTORM_ID,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
-	XCM_REG_DBG_SELECT, XCM_REG_DBG_DWORD_ENABLE,
-	XCM_REG_DBG_SHIFT, XCM_REG_DBG_FORCE_VALID,
-	XCM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 19
-};
-
-static struct block_defs block_ycm_defs = {
-	"ycm",
-	{true, true, true}, true, DBG_YSTORM_ID,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY, DBG_BUS_CLIENT_RBCY},
-	YCM_REG_DBG_SELECT, YCM_REG_DBG_DWORD_ENABLE,
-	YCM_REG_DBG_SHIFT, YCM_REG_DBG_FORCE_VALID,
-	YCM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 5
-};
-
-static struct block_defs block_pcm_defs = {
-	"pcm",
-	{true, true, true}, true, DBG_PSTORM_ID,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
-	PCM_REG_DBG_SELECT, PCM_REG_DBG_DWORD_ENABLE,
-	PCM_REG_DBG_SHIFT, PCM_REG_DBG_FORCE_VALID,
-	PCM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 4
-};
-
-static struct block_defs block_qm_defs = {
-	"qm",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ, DBG_BUS_CLIENT_RBCQ},
-	QM_REG_DBG_SELECT, QM_REG_DBG_DWORD_ENABLE,
-	QM_REG_DBG_SHIFT, QM_REG_DBG_FORCE_VALID,
-	QM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 16
-};
-
-static struct block_defs block_tm_defs = {
-	"tm",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
-	TM_REG_DBG_SELECT, TM_REG_DBG_DWORD_ENABLE,
-	TM_REG_DBG_SHIFT, TM_REG_DBG_FORCE_VALID,
-	TM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 17
-};
-
-static struct block_defs block_dorq_defs = {
-	"dorq",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY, DBG_BUS_CLIENT_RBCY},
-	DORQ_REG_DBG_SELECT, DORQ_REG_DBG_DWORD_ENABLE,
-	DORQ_REG_DBG_SHIFT, DORQ_REG_DBG_FORCE_VALID,
-	DORQ_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 18
-};
-
-static struct block_defs block_brb_defs = {
-	"brb",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
-	BRB_REG_DBG_SELECT, BRB_REG_DBG_DWORD_ENABLE,
-	BRB_REG_DBG_SHIFT, BRB_REG_DBG_FORCE_VALID,
-	BRB_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 0
-};
-
-static struct block_defs block_src_defs = {
-	"src",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
-	SRC_REG_DBG_SELECT, SRC_REG_DBG_DWORD_ENABLE,
-	SRC_REG_DBG_SHIFT, SRC_REG_DBG_FORCE_VALID,
-	SRC_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 2
-};
-
-static struct block_defs block_prs_defs = {
-	"prs",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
-	PRS_REG_DBG_SELECT, PRS_REG_DBG_DWORD_ENABLE,
-	PRS_REG_DBG_SHIFT, PRS_REG_DBG_FORCE_VALID,
-	PRS_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 1
-};
-
-static struct block_defs block_tsdm_defs = {
-	"tsdm",
-	{true, true, true}, true, DBG_TSTORM_ID,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
-	TSDM_REG_DBG_SELECT, TSDM_REG_DBG_DWORD_ENABLE,
-	TSDM_REG_DBG_SHIFT, TSDM_REG_DBG_FORCE_VALID,
-	TSDM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 3
-};
-
-static struct block_defs block_msdm_defs = {
-	"msdm",
-	{true, true, true}, true, DBG_MSTORM_ID,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM, DBG_BUS_CLIENT_RBCM},
-	MSDM_REG_DBG_SELECT, MSDM_REG_DBG_DWORD_ENABLE,
-	MSDM_REG_DBG_SHIFT, MSDM_REG_DBG_FORCE_VALID,
-	MSDM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 6
-};
-
-static struct block_defs block_usdm_defs = {
-	"usdm",
-	{true, true, true}, true, DBG_USTORM_ID,
-	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
-	USDM_REG_DBG_SELECT, USDM_REG_DBG_DWORD_ENABLE,
-	USDM_REG_DBG_SHIFT, USDM_REG_DBG_FORCE_VALID,
-	USDM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 7
-};
-
-static struct block_defs block_xsdm_defs = {
-	"xsdm",
-	{true, true, true}, true, DBG_XSTORM_ID,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
-	XSDM_REG_DBG_SELECT, XSDM_REG_DBG_DWORD_ENABLE,
-	XSDM_REG_DBG_SHIFT, XSDM_REG_DBG_FORCE_VALID,
-	XSDM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 20
-};
-
-static struct block_defs block_ysdm_defs = {
-	"ysdm",
-	{true, true, true}, true, DBG_YSTORM_ID,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY, DBG_BUS_CLIENT_RBCY},
-	YSDM_REG_DBG_SELECT, YSDM_REG_DBG_DWORD_ENABLE,
-	YSDM_REG_DBG_SHIFT, YSDM_REG_DBG_FORCE_VALID,
-	YSDM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 8
-};
-
-static struct block_defs block_psdm_defs = {
-	"psdm",
-	{true, true, true}, true, DBG_PSTORM_ID,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
-	PSDM_REG_DBG_SELECT, PSDM_REG_DBG_DWORD_ENABLE,
-	PSDM_REG_DBG_SHIFT, PSDM_REG_DBG_FORCE_VALID,
-	PSDM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 7
-};
-
-static struct block_defs block_tsem_defs = {
-	"tsem",
-	{true, true, true}, true, DBG_TSTORM_ID,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
-	TSEM_REG_DBG_SELECT, TSEM_REG_DBG_DWORD_ENABLE,
-	TSEM_REG_DBG_SHIFT, TSEM_REG_DBG_FORCE_VALID,
-	TSEM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 4
-};
-
-static struct block_defs block_msem_defs = {
-	"msem",
-	{true, true, true}, true, DBG_MSTORM_ID,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM, DBG_BUS_CLIENT_RBCM},
-	MSEM_REG_DBG_SELECT, MSEM_REG_DBG_DWORD_ENABLE,
-	MSEM_REG_DBG_SHIFT, MSEM_REG_DBG_FORCE_VALID,
-	MSEM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 9
-};
-
-static struct block_defs block_usem_defs = {
-	"usem",
-	{true, true, true}, true, DBG_USTORM_ID,
-	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
-	USEM_REG_DBG_SELECT, USEM_REG_DBG_DWORD_ENABLE,
-	USEM_REG_DBG_SHIFT, USEM_REG_DBG_FORCE_VALID,
-	USEM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 9
-};
-
-static struct block_defs block_xsem_defs = {
-	"xsem",
-	{true, true, true}, true, DBG_XSTORM_ID,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
-	XSEM_REG_DBG_SELECT, XSEM_REG_DBG_DWORD_ENABLE,
-	XSEM_REG_DBG_SHIFT, XSEM_REG_DBG_FORCE_VALID,
-	XSEM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 21
-};
-
-static struct block_defs block_ysem_defs = {
-	"ysem",
-	{true, true, true}, true, DBG_YSTORM_ID,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY, DBG_BUS_CLIENT_RBCY},
-	YSEM_REG_DBG_SELECT, YSEM_REG_DBG_DWORD_ENABLE,
-	YSEM_REG_DBG_SHIFT, YSEM_REG_DBG_FORCE_VALID,
-	YSEM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 11
-};
-
-static struct block_defs block_psem_defs = {
-	"psem",
-	{true, true, true}, true, DBG_PSTORM_ID,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
-	PSEM_REG_DBG_SELECT, PSEM_REG_DBG_DWORD_ENABLE,
-	PSEM_REG_DBG_SHIFT, PSEM_REG_DBG_FORCE_VALID,
-	PSEM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 10
-};
-
-static struct block_defs block_rss_defs = {
-	"rss",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
-	RSS_REG_DBG_SELECT, RSS_REG_DBG_DWORD_ENABLE,
-	RSS_REG_DBG_SHIFT, RSS_REG_DBG_FORCE_VALID,
-	RSS_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 18
-};
-
-static struct block_defs block_tmld_defs = {
-	"tmld",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM, DBG_BUS_CLIENT_RBCM},
-	TMLD_REG_DBG_SELECT, TMLD_REG_DBG_DWORD_ENABLE,
-	TMLD_REG_DBG_SHIFT, TMLD_REG_DBG_FORCE_VALID,
-	TMLD_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 13
-};
-
-static struct block_defs block_muld_defs = {
-	"muld",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
-	MULD_REG_DBG_SELECT, MULD_REG_DBG_DWORD_ENABLE,
-	MULD_REG_DBG_SHIFT, MULD_REG_DBG_FORCE_VALID,
-	MULD_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 14
-};
-
-static struct block_defs block_yuld_defs = {
-	"yuld",
-	{true, true, false}, false, 0,
-	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU,
-	 MAX_DBG_BUS_CLIENTS},
-	YULD_REG_DBG_SELECT_BB_K2, YULD_REG_DBG_DWORD_ENABLE_BB_K2,
-	YULD_REG_DBG_SHIFT_BB_K2, YULD_REG_DBG_FORCE_VALID_BB_K2,
-	YULD_REG_DBG_FORCE_FRAME_BB_K2,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2,
-	15
-};
-
-static struct block_defs block_xyld_defs = {
-	"xyld",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
-	XYLD_REG_DBG_SELECT, XYLD_REG_DBG_DWORD_ENABLE,
-	XYLD_REG_DBG_SHIFT, XYLD_REG_DBG_FORCE_VALID,
-	XYLD_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 12
-};
-
-static struct block_defs block_ptld_defs = {
-	"ptld",
-	{false, false, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCT},
-	PTLD_REG_DBG_SELECT_E5, PTLD_REG_DBG_DWORD_ENABLE_E5,
-	PTLD_REG_DBG_SHIFT_E5, PTLD_REG_DBG_FORCE_VALID_E5,
-	PTLD_REG_DBG_FORCE_FRAME_E5,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2,
-	28
-};
-
-static struct block_defs block_ypld_defs = {
-	"ypld",
-	{false, false, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCS},
-	YPLD_REG_DBG_SELECT_E5, YPLD_REG_DBG_DWORD_ENABLE_E5,
-	YPLD_REG_DBG_SHIFT_E5, YPLD_REG_DBG_FORCE_VALID_E5,
-	YPLD_REG_DBG_FORCE_FRAME_E5,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2,
-	27
-};
-
-static struct block_defs block_prm_defs = {
-	"prm",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM, DBG_BUS_CLIENT_RBCM},
-	PRM_REG_DBG_SELECT, PRM_REG_DBG_DWORD_ENABLE,
-	PRM_REG_DBG_SHIFT, PRM_REG_DBG_FORCE_VALID,
-	PRM_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 21
-};
-
-static struct block_defs block_pbf_pb1_defs = {
-	"pbf_pb1",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV, DBG_BUS_CLIENT_RBCV},
-	PBF_PB1_REG_DBG_SELECT, PBF_PB1_REG_DBG_DWORD_ENABLE,
-	PBF_PB1_REG_DBG_SHIFT, PBF_PB1_REG_DBG_FORCE_VALID,
-	PBF_PB1_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
-	11
-};
-
-static struct block_defs block_pbf_pb2_defs = {
-	"pbf_pb2",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV, DBG_BUS_CLIENT_RBCV},
-	PBF_PB2_REG_DBG_SELECT, PBF_PB2_REG_DBG_DWORD_ENABLE,
-	PBF_PB2_REG_DBG_SHIFT, PBF_PB2_REG_DBG_FORCE_VALID,
-	PBF_PB2_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
-	12
-};
-
-static struct block_defs block_rpb_defs = {
-	"rpb",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM, DBG_BUS_CLIENT_RBCM},
-	RPB_REG_DBG_SELECT, RPB_REG_DBG_DWORD_ENABLE,
-	RPB_REG_DBG_SHIFT, RPB_REG_DBG_FORCE_VALID,
-	RPB_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 13
-};
-
-static struct block_defs block_btb_defs = {
-	"btb",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV, DBG_BUS_CLIENT_RBCV},
-	BTB_REG_DBG_SELECT, BTB_REG_DBG_DWORD_ENABLE,
-	BTB_REG_DBG_SHIFT, BTB_REG_DBG_FORCE_VALID,
-	BTB_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 10
-};
-
-static struct block_defs block_pbf_defs = {
-	"pbf",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV, DBG_BUS_CLIENT_RBCV},
-	PBF_REG_DBG_SELECT, PBF_REG_DBG_DWORD_ENABLE,
-	PBF_REG_DBG_SHIFT, PBF_REG_DBG_FORCE_VALID,
-	PBF_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 15
-};
-
-static struct block_defs block_rdif_defs = {
-	"rdif",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM, DBG_BUS_CLIENT_RBCM},
-	RDIF_REG_DBG_SELECT, RDIF_REG_DBG_DWORD_ENABLE,
-	RDIF_REG_DBG_SHIFT, RDIF_REG_DBG_FORCE_VALID,
-	RDIF_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 16
-};
-
-static struct block_defs block_tdif_defs = {
-	"tdif",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
-	TDIF_REG_DBG_SELECT, TDIF_REG_DBG_DWORD_ENABLE,
-	TDIF_REG_DBG_SHIFT, TDIF_REG_DBG_FORCE_VALID,
-	TDIF_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 17
-};
-
-static struct block_defs block_cdu_defs = {
-	"cdu",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
-	CDU_REG_DBG_SELECT, CDU_REG_DBG_DWORD_ENABLE,
-	CDU_REG_DBG_SHIFT, CDU_REG_DBG_FORCE_VALID,
-	CDU_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 23
-};
-
-static struct block_defs block_ccfc_defs = {
-	"ccfc",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
-	CCFC_REG_DBG_SELECT, CCFC_REG_DBG_DWORD_ENABLE,
-	CCFC_REG_DBG_SHIFT, CCFC_REG_DBG_FORCE_VALID,
-	CCFC_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 24
-};
-
-static struct block_defs block_tcfc_defs = {
-	"tcfc",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
-	TCFC_REG_DBG_SELECT, TCFC_REG_DBG_DWORD_ENABLE,
-	TCFC_REG_DBG_SHIFT, TCFC_REG_DBG_FORCE_VALID,
-	TCFC_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 25
-};
-
-static struct block_defs block_igu_defs = {
-	"igu",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	IGU_REG_DBG_SELECT, IGU_REG_DBG_DWORD_ENABLE,
-	IGU_REG_DBG_SHIFT, IGU_REG_DBG_FORCE_VALID,
-	IGU_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 27
-};
-
-static struct block_defs block_cau_defs = {
-	"cau",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
-	CAU_REG_DBG_SELECT, CAU_REG_DBG_DWORD_ENABLE,
-	CAU_REG_DBG_SHIFT, CAU_REG_DBG_FORCE_VALID,
-	CAU_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 19
-};
-
-static struct block_defs block_rgfs_defs = {
-	"rgfs", {false, false, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 29
-};
-
-static struct block_defs block_rgsrc_defs = {
-	"rgsrc",
-	{false, false, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
-	RGSRC_REG_DBG_SELECT_E5, RGSRC_REG_DBG_DWORD_ENABLE_E5,
-	RGSRC_REG_DBG_SHIFT_E5, RGSRC_REG_DBG_FORCE_VALID_E5,
-	RGSRC_REG_DBG_FORCE_FRAME_E5,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
-	30
-};
-
-static struct block_defs block_tgfs_defs = {
-	"tgfs", {false, false, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 30
-};
-
-static struct block_defs block_tgsrc_defs = {
-	"tgsrc",
-	{false, false, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCV},
-	TGSRC_REG_DBG_SELECT_E5, TGSRC_REG_DBG_DWORD_ENABLE_E5,
-	TGSRC_REG_DBG_SHIFT_E5, TGSRC_REG_DBG_FORCE_VALID_E5,
-	TGSRC_REG_DBG_FORCE_FRAME_E5,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
-	31
-};
-
-static struct block_defs block_umac_defs = {
-	"umac",
-	{true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ,
-	 DBG_BUS_CLIENT_RBCZ},
-	UMAC_REG_DBG_SELECT_K2_E5, UMAC_REG_DBG_DWORD_ENABLE_K2_E5,
-	UMAC_REG_DBG_SHIFT_K2_E5, UMAC_REG_DBG_FORCE_VALID_K2_E5,
-	UMAC_REG_DBG_FORCE_FRAME_K2_E5,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 6
-};
-
-static struct block_defs block_xmac_defs = {
-	"xmac", {true, false, false}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_dbg_defs = {
-	"dbg", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 3
-};
-
-static struct block_defs block_nig_defs = {
-	"nig",
-	{true, true, true}, false, 0,
-	{DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
-	NIG_REG_DBG_SELECT, NIG_REG_DBG_DWORD_ENABLE,
-	NIG_REG_DBG_SHIFT, NIG_REG_DBG_FORCE_VALID,
-	NIG_REG_DBG_FORCE_FRAME,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 0
-};
-
-static struct block_defs block_wol_defs = {
-	"wol",
-	{false, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
-	WOL_REG_DBG_SELECT_K2_E5, WOL_REG_DBG_DWORD_ENABLE_K2_E5,
-	WOL_REG_DBG_SHIFT_K2_E5, WOL_REG_DBG_FORCE_VALID_K2_E5,
-	WOL_REG_DBG_FORCE_FRAME_K2_E5,
-	true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 7
-};
-
-static struct block_defs block_bmbn_defs = {
-	"bmbn",
-	{false, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB,
-	 DBG_BUS_CLIENT_RBCB},
-	BMBN_REG_DBG_SELECT_K2_E5, BMBN_REG_DBG_DWORD_ENABLE_K2_E5,
-	BMBN_REG_DBG_SHIFT_K2_E5, BMBN_REG_DBG_FORCE_VALID_K2_E5,
-	BMBN_REG_DBG_FORCE_FRAME_K2_E5,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_ipc_defs = {
-	"ipc", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, false, DBG_RESET_REG_MISCS_PL_UA, 8
-};
-
-static struct block_defs block_nwm_defs = {
-	"nwm",
-	{false, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW, DBG_BUS_CLIENT_RBCW},
-	NWM_REG_DBG_SELECT_K2_E5, NWM_REG_DBG_DWORD_ENABLE_K2_E5,
-	NWM_REG_DBG_SHIFT_K2_E5, NWM_REG_DBG_FORCE_VALID_K2_E5,
-	NWM_REG_DBG_FORCE_FRAME_K2_E5,
-	true, false, DBG_RESET_REG_MISCS_PL_HV_2, 0
-};
-
-static struct block_defs block_nws_defs = {
-	"nws",
-	{false, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW, DBG_BUS_CLIENT_RBCW},
-	NWS_REG_DBG_SELECT_K2_E5, NWS_REG_DBG_DWORD_ENABLE_K2_E5,
-	NWS_REG_DBG_SHIFT_K2_E5, NWS_REG_DBG_FORCE_VALID_K2_E5,
-	NWS_REG_DBG_FORCE_FRAME_K2_E5,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 12
-};
-
-static struct block_defs block_ms_defs = {
-	"ms",
-	{false, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
-	MS_REG_DBG_SELECT_K2_E5, MS_REG_DBG_DWORD_ENABLE_K2_E5,
-	MS_REG_DBG_SHIFT_K2_E5, MS_REG_DBG_FORCE_VALID_K2_E5,
-	MS_REG_DBG_FORCE_FRAME_K2_E5,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 13
-};
-
-static struct block_defs block_phy_pcie_defs = {
-	"phy_pcie",
-	{false, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH,
-	 DBG_BUS_CLIENT_RBCH},
-	PCIE_REG_DBG_COMMON_SELECT_K2_E5,
-	PCIE_REG_DBG_COMMON_DWORD_ENABLE_K2_E5,
-	PCIE_REG_DBG_COMMON_SHIFT_K2_E5,
-	PCIE_REG_DBG_COMMON_FORCE_VALID_K2_E5,
-	PCIE_REG_DBG_COMMON_FORCE_FRAME_K2_E5,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_led_defs = {
-	"led", {false, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, false, DBG_RESET_REG_MISCS_PL_HV, 14
-};
-
-static struct block_defs block_avs_wrap_defs = {
-	"avs_wrap", {false, true, false}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	true, false, DBG_RESET_REG_MISCS_PL_UA, 11
-};
-
-static struct block_defs block_pxpreqbus_defs = {
-	"pxpreqbus", {false, false, false}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_misc_aeu_defs = {
-	"misc_aeu", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs block_bar0_map_defs = {
-	"bar0_map", {true, true, true}, false, 0,
-	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
-	0, 0, 0, 0, 0,
-	false, false, MAX_DBG_RESET_REGS, 0
-};
-
-static struct block_defs *s_block_defs[MAX_BLOCK_ID] = {
-	&block_grc_defs,
-	&block_miscs_defs,
-	&block_misc_defs,
-	&block_dbu_defs,
-	&block_pglue_b_defs,
-	&block_cnig_defs,
-	&block_cpmu_defs,
-	&block_ncsi_defs,
-	&block_opte_defs,
-	&block_bmb_defs,
-	&block_pcie_defs,
-	&block_mcp_defs,
-	&block_mcp2_defs,
-	&block_pswhst_defs,
-	&block_pswhst2_defs,
-	&block_pswrd_defs,
-	&block_pswrd2_defs,
-	&block_pswwr_defs,
-	&block_pswwr2_defs,
-	&block_pswrq_defs,
-	&block_pswrq2_defs,
-	&block_pglcs_defs,
-	&block_dmae_defs,
-	&block_ptu_defs,
-	&block_tcm_defs,
-	&block_mcm_defs,
-	&block_ucm_defs,
-	&block_xcm_defs,
-	&block_ycm_defs,
-	&block_pcm_defs,
-	&block_qm_defs,
-	&block_tm_defs,
-	&block_dorq_defs,
-	&block_brb_defs,
-	&block_src_defs,
-	&block_prs_defs,
-	&block_tsdm_defs,
-	&block_msdm_defs,
-	&block_usdm_defs,
-	&block_xsdm_defs,
-	&block_ysdm_defs,
-	&block_psdm_defs,
-	&block_tsem_defs,
-	&block_msem_defs,
-	&block_usem_defs,
-	&block_xsem_defs,
-	&block_ysem_defs,
-	&block_psem_defs,
-	&block_rss_defs,
-	&block_tmld_defs,
-	&block_muld_defs,
-	&block_yuld_defs,
-	&block_xyld_defs,
-	&block_ptld_defs,
-	&block_ypld_defs,
-	&block_prm_defs,
-	&block_pbf_pb1_defs,
-	&block_pbf_pb2_defs,
-	&block_rpb_defs,
-	&block_btb_defs,
-	&block_pbf_defs,
-	&block_rdif_defs,
-	&block_tdif_defs,
-	&block_cdu_defs,
-	&block_ccfc_defs,
-	&block_tcfc_defs,
-	&block_igu_defs,
-	&block_cau_defs,
-	&block_rgfs_defs,
-	&block_rgsrc_defs,
-	&block_tgfs_defs,
-	&block_tgsrc_defs,
-	&block_umac_defs,
-	&block_xmac_defs,
-	&block_dbg_defs,
-	&block_nig_defs,
-	&block_wol_defs,
-	&block_bmbn_defs,
-	&block_ipc_defs,
-	&block_nwm_defs,
-	&block_nws_defs,
-	&block_ms_defs,
-	&block_phy_pcie_defs,
-	&block_led_defs,
-	&block_avs_wrap_defs,
-	&block_pxpreqbus_defs,
-	&block_misc_aeu_defs,
-	&block_bar0_map_defs,
-};
-
-static struct platform_defs s_platform_defs[] = {
+		{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+		true,
+		PSEM_REG_FAST_MEMORY,
+		PSEM_REG_DBG_FRAME_MODE_BB_K2,
+		PSEM_REG_SLOW_DBG_ACTIVE_BB_K2,
+		PSEM_REG_SLOW_DBG_MODE_BB_K2,
+		PSEM_REG_DBG_MODE1_CFG_BB_K2,
+		PSEM_REG_SYNC_DBG_EMPTY,
+		PSEM_REG_DBG_GPRE_VECT,
+		PCM_REG_CTX_RBC_ACCS,
+		{0, PCM_REG_SM_CON_CTX, 0, 0},
+		{{0, 10, 0, 0}, {0, 10, 0, 0}} /* {bb} {k2} */
+	},
+};
+
+static struct hw_type_defs s_hw_type_defs[] = {
+	/* HW_TYPE_ASIC */
 	{"asic", 1, 256, 32768},
 	{"reserved", 0, 0, 0},
 	{"reserved2", 0, 0, 0},
@@ -1505,146 +579,159 @@ static struct platform_defs s_platform_defs[] = {
 
 static struct grc_param_defs s_grc_param_defs[] = {
 	/* DBG_GRC_PARAM_DUMP_TSTORM */
-	{{1, 1, 1}, 0, 1, false, false, 1, 1},
+	{{1, 1}, 0, 1, false, false, 1, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_MSTORM */
-	{{1, 1, 1}, 0, 1, false, false, 1, 1},
+	{{1, 1}, 0, 1, false, false, 1, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_USTORM */
-	{{1, 1, 1}, 0, 1, false, false, 1, 1},
+	{{1, 1}, 0, 1, false, false, 1, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_XSTORM */
-	{{1, 1, 1}, 0, 1, false, false, 1, 1},
+	{{1, 1}, 0, 1, false, false, 1, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_YSTORM */
-	{{1, 1, 1}, 0, 1, false, false, 1, 1},
+	{{1, 1}, 0, 1, false, false, 1, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_PSTORM */
-	{{1, 1, 1}, 0, 1, false, false, 1, 1},
+	{{1, 1}, 0, 1, false, false, 1, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_REGS */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_RAM */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_PBUF */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_IOR */
-	{{0, 0, 0}, 0, 1, false, false, 0, 1},
+	{{0, 0}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_VFC */
-	{{0, 0, 0}, 0, 1, false, false, 0, 1},
+	{{0, 0}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_CM_CTX */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_ILT */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_RSS */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_CAU */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_QM */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_MCP */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
-	/* DBG_GRC_PARAM_MCP_TRACE_META_SIZE */
-	{{1, 1, 1}, 1, 0xffffffff, false, true, 0, 1},
+	/* DBG_GRC_PARAM_DUMP_DORQ */
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_CFC */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_IGU */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_BRB */
-	{{0, 0, 0}, 0, 1, false, false, 0, 1},
+	{{0, 0}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_BTB */
-	{{0, 0, 0}, 0, 1, false, false, 0, 1},
+	{{0, 0}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_BMB */
-	{{0, 0, 0}, 0, 1, false, false, 0, 0},
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
 
-	/* DBG_GRC_PARAM_DUMP_NIG */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	/* DBG_GRC_PARAM_RESERVED1 */
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
 
 	/* DBG_GRC_PARAM_DUMP_MULD */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_PRS */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_DMAE */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_TM */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_SDM */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_DIF */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_STATIC */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_UNSTALL */
-	{{0, 0, 0}, 0, 1, false, false, 0, 0},
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
 
-	/* DBG_GRC_PARAM_NUM_LCIDS */
-	{{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, false,
-	 MAX_LCIDS, MAX_LCIDS},
+	/* DBG_GRC_PARAM_RESERVED2 */
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
 
-	/* DBG_GRC_PARAM_NUM_LTIDS */
-	{{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, false,
-	 MAX_LTIDS, MAX_LTIDS},
+	/* DBG_GRC_PARAM_MCP_TRACE_META_SIZE */
+	{{0, 0}, 1, 0xffffffff, false, true, 0, {0, 0}},
 
 	/* DBG_GRC_PARAM_EXCLUDE_ALL */
-	{{0, 0, 0}, 0, 1, true, false, 0, 0},
+	{{0, 0}, 0, 1, true, false, 0, {0, 0}},
 
 	/* DBG_GRC_PARAM_CRASH */
-	{{0, 0, 0}, 0, 1, true, false, 0, 0},
+	{{0, 0}, 0, 1, true, false, 0, {0, 0}},
 
 	/* DBG_GRC_PARAM_PARITY_SAFE */
-	{{0, 0, 0}, 0, 1, false, false, 1, 0},
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
 
 	/* DBG_GRC_PARAM_DUMP_CM */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{1, 1}, 0, 1, false, false, 0, {1, 1}},
 
 	/* DBG_GRC_PARAM_DUMP_PHY */
-	{{1, 1, 1}, 0, 1, false, false, 0, 1},
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
 
 	/* DBG_GRC_PARAM_NO_MCP */
-	{{0, 0, 0}, 0, 1, false, false, 0, 0},
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
 
 	/* DBG_GRC_PARAM_NO_FW_VER */
-	{{0, 0, 0}, 0, 1, false, false, 0, 0}
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
+
+	/* DBG_GRC_PARAM_RESERVED3 */
+	{{0, 0}, 0, 1, false, false, 0, {0, 0}},
+
+	/* DBG_GRC_PARAM_DUMP_MCP_HW_DUMP */
+	{{0, 1}, 0, 1, false, false, 0, {0, 1}},
+
+	/* DBG_GRC_PARAM_DUMP_ILT_CDUC */
+	{{1, 1}, 0, 1, false, false, 0, {0, 0}},
+
+	/* DBG_GRC_PARAM_DUMP_ILT_CDUT */
+	{{1, 1}, 0, 1, false, false, 0, {0, 0}},
+
+	/* DBG_GRC_PARAM_DUMP_CAU_EXT */
+	{{0, 0}, 0, 1, false, false, 0, {1, 1}}
 };
 
 static struct rss_mem_defs s_rss_mem_defs[] = {
-	{ "rss_mem_cid", "rss_cid", 0, 32,
-	  {256, 320, 512} },
+	{"rss_mem_cid", "rss_cid", 0, 32,
+	 {256, 320}},
 
-	{ "rss_mem_key_msb", "rss_key", 1024, 256,
-	  {128, 208, 257} },
+	{"rss_mem_key_msb", "rss_key", 1024, 256,
+	 {128, 208}},
 
-	{ "rss_mem_key_lsb", "rss_key", 2048, 64,
-	  {128, 208, 257} },
+	{"rss_mem_key_lsb", "rss_key", 2048, 64,
+	 {128, 208}},
 
-	{ "rss_mem_info", "rss_info", 3072, 16,
-	  {128, 208, 256} },
+	{"rss_mem_info", "rss_info", 3072, 16,
+	 {128, 208}},
 
-	{ "rss_mem_ind", "rss_ind", 4096, 16,
-	  {16384, 26624, 32768} }
+	{"rss_mem_ind", "rss_ind", 4096, 16,
+	 {16384, 26624}}
 };
 
 static struct vfc_ram_defs s_vfc_ram_defs[] = {
@@ -1655,54 +742,31 @@ static struct vfc_ram_defs s_vfc_ram_defs[] = {
 };
 
 static struct big_ram_defs s_big_ram_defs[] = {
-	{ "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB,
-	  BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA,
-	  MISC_REG_BLOCK_256B_EN, {0, 0, 0},
-	  {153600, 180224, 282624} },
-
-	{ "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB,
-	  BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA,
-	  MISC_REG_BLOCK_256B_EN, {0, 1, 1},
-	  {92160, 117760, 168960} },
-
-	{ "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB,
-	  BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA,
-	  MISCS_REG_BLOCK_256B_EN, {0, 0, 0},
-	  {36864, 36864, 36864} }
-};
-
-static struct reset_reg_defs s_reset_regs_defs[] = {
-	/* DBG_RESET_REG_MISCS_PL_UA */
-	{ MISCS_REG_RESET_PL_UA,
-	  {true, true, true}, {0x0, 0x0, 0x0} },
-
-	/* DBG_RESET_REG_MISCS_PL_HV */
-	{ MISCS_REG_RESET_PL_HV,
-	  {true, true, true}, {0x0, 0x400, 0x600} },
-
-	/* DBG_RESET_REG_MISCS_PL_HV_2 */
-	{ MISCS_REG_RESET_PL_HV_2_K2_E5,
-	  {false, true, true}, {0x0, 0x0, 0x0} },
-
-	/* DBG_RESET_REG_MISC_PL_UA */
-	{ MISC_REG_RESET_PL_UA,
-	  {true, true, true}, {0x0, 0x0, 0x0} },
-
-	/* DBG_RESET_REG_MISC_PL_HV */
-	{ MISC_REG_RESET_PL_HV,
-	  {true, true, true}, {0x0, 0x0, 0x0} },
+	{"BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB,
+	 BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA,
+	 MISC_REG_BLOCK_256B_EN, {0, 0},
+	 {153600, 180224}},
 
-	/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */
-	{ MISC_REG_RESET_PL_PDA_VMAIN_1,
-	  {true, true, true}, {0x4404040, 0x4404040, 0x404040} },
+	{"BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB,
+	 BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA,
+	 MISC_REG_BLOCK_256B_EN, {0, 1},
+	 {92160, 117760}},
 
-	/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */
-	{ MISC_REG_RESET_PL_PDA_VMAIN_2,
-	  {true, true, true}, {0x7, 0x7c00007, 0x5c08007} },
+	{"BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB,
+	 BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA,
+	 MISCS_REG_BLOCK_256B_EN, {0, 0},
+	 {36864, 36864}}
+};
 
-	/* DBG_RESET_REG_MISC_PL_PDA_VAUX */
-	{ MISC_REG_RESET_PL_PDA_VAUX,
-	  {true, true, true}, {0x2, 0x2, 0x2} },
+static struct rbc_reset_defs s_rbc_reset_defs[] = {
+	{MISCS_REG_RESET_PL_HV,
+	 {0x0, 0x400}},
+	{MISC_REG_RESET_PL_PDA_VMAIN_1,
+	 {0x4404040, 0x4404040}},
+	{MISC_REG_RESET_PL_PDA_VMAIN_2,
+	 {0x7, 0x7c00007}},
+	{MISC_REG_RESET_PL_PDA_VAUX,
+	 {0x2, 0x2}},
 };
 
 static struct phy_defs s_phy_defs[] = {
@@ -1785,9 +849,19 @@ static void qed_dbg_grc_init_params(struct qed_hwfn *p_hwfn)
 	}
 }
 
+/* Sets pointer and size for the specified binary buffer type */
+static void qed_set_dbg_bin_buf(struct qed_hwfn *p_hwfn,
+				enum bin_dbg_buffer_type buf_type,
+				const u32 *ptr, u32 size)
+{
+	struct virt_mem_desc *buf = &p_hwfn->dbg_arrays[buf_type];
+
+	buf->ptr = (void *)ptr;
+	buf->size = size;
+}
+
 /* Initializes debug data for the specified device */
-static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
-					struct qed_ptt *p_ptt)
+static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	u8 num_pfs = 0, max_pfs_per_port = 0;
@@ -1812,26 +886,25 @@ static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
 		return DBG_STATUS_UNKNOWN_CHIP;
 	}
 
-	/* Set platofrm */
-	dev_data->platform_id = PLATFORM_ASIC;
+	/* Set HW type */
+	dev_data->hw_type = HW_TYPE_ASIC;
 	dev_data->mode_enable[MODE_ASIC] = 1;
 
 	/* Set port mode */
-	switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) {
-	case 0:
+	switch (p_hwfn->cdev->num_ports_in_engine) {
+	case 1:
 		dev_data->mode_enable[MODE_PORTS_PER_ENG_1] = 1;
 		break;
-	case 1:
+	case 2:
 		dev_data->mode_enable[MODE_PORTS_PER_ENG_2] = 1;
 		break;
-	case 2:
+	case 4:
 		dev_data->mode_enable[MODE_PORTS_PER_ENG_4] = 1;
 		break;
 	}
 
 	/* Set 100G mode */
-	if (dev_data->chip_id == CHIP_BB &&
-	    qed_rd(p_hwfn, p_ptt, CNIG_REG_NW_PORT_MODE_BB) == 2)
+	if (QED_IS_CMT(p_hwfn->cdev))
 		dev_data->mode_enable[MODE_100G] = 1;
 
 	/* Set number of ports */
@@ -1857,14 +930,36 @@ static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
 	return DBG_STATUS_OK;
 }
 
-static struct dbg_bus_block *get_dbg_bus_block_desc(struct qed_hwfn *p_hwfn,
-						    enum block_id block_id)
+static const struct dbg_block *get_dbg_block(struct qed_hwfn *p_hwfn,
+					     enum block_id block_id)
+{
+	const struct dbg_block *dbg_block;
+
+	dbg_block = p_hwfn->dbg_arrays[BIN_BUF_DBG_BLOCKS].ptr;
+	return dbg_block + block_id;
+}
+
+static const struct dbg_block_chip *qed_get_dbg_block_per_chip(struct qed_hwfn
+							       *p_hwfn,
+							       enum block_id
+							       block_id)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 
-	return (struct dbg_bus_block *)&dbg_bus_blocks[block_id *
-						       MAX_CHIP_IDS +
-						       dev_data->chip_id];
+	return (const struct dbg_block_chip *)
+	    p_hwfn->dbg_arrays[BIN_BUF_DBG_BLOCKS_CHIP_DATA].ptr +
+	    block_id * MAX_CHIP_IDS + dev_data->chip_id;
+}
+
+static const struct dbg_reset_reg *qed_get_dbg_reset_reg(struct qed_hwfn
+							 *p_hwfn,
+							 u8 reset_reg_id)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+	return (const struct dbg_reset_reg *)
+	    p_hwfn->dbg_arrays[BIN_BUF_DBG_RESET_REGS].ptr +
+	    reset_reg_id * MAX_CHIP_IDS + dev_data->chip_id;
 }
 
 /* Reads the FW info structure for the specified Storm from the chip,
@@ -1885,8 +980,9 @@ static void qed_read_storm_fw_info(struct qed_hwfn *p_hwfn,
 	 * The address is located in the last line of the Storm RAM.
 	 */
 	addr = storm->sem_fast_mem_addr + SEM_FAST_REG_INT_RAM +
-	       DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE_BB_K2) -
-	       sizeof(fw_info_location);
+	    DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) -
+	    sizeof(fw_info_location);
+
 	dest = (u32 *)&fw_info_location;
 
 	for (i = 0; i < BYTES_TO_DWORDS(sizeof(fw_info_location));
@@ -2081,6 +1177,29 @@ static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn,
 	return qed_dump_str_param(dump_buf, dump, "mfw-version", mfw_ver_str);
 }
 
+/* Reads the chip revision from the chip and writes it as a param to the
+ * specified buffer. Returns the dumped size in dwords.
+ */
+static u32 qed_dump_chip_revision_param(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					u32 *dump_buf, bool dump)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	char param_str[3] = "??";
+
+	if (dev_data->hw_type == HW_TYPE_ASIC) {
+		u32 chip_rev, chip_metal;
+
+		chip_rev = qed_rd(p_hwfn, p_ptt, MISCS_REG_CHIP_REV);
+		chip_metal = qed_rd(p_hwfn, p_ptt, MISCS_REG_CHIP_METAL);
+
+		param_str[0] = 'a' + (u8)chip_rev;
+		param_str[1] = '0' + (u8)chip_metal;
+	}
+
+	return qed_dump_str_param(dump_buf, dump, "chip-revision", param_str);
+}
+
 /* Writes a section header to the specified buffer.
  * Returns the dumped size in dwords.
  */
@@ -2104,7 +1223,8 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
 	u8 num_params;
 
 	/* Dump global params section header */
-	num_params = NUM_COMMON_GLOBAL_PARAMS + num_specific_global_params;
+	num_params = NUM_COMMON_GLOBAL_PARAMS + num_specific_global_params +
+		(dev_data->chip_id == CHIP_BB ? 1 : 0);
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "global_params", num_params);
 
@@ -2112,6 +1232,8 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_fw_ver_param(p_hwfn, p_ptt, dump_buf + offset, dump);
 	offset += qed_dump_mfw_ver_param(p_hwfn,
 					 p_ptt, dump_buf + offset, dump);
+	offset += qed_dump_chip_revision_param(p_hwfn,
+					       p_ptt, dump_buf + offset, dump);
 	offset += qed_dump_num_param(dump_buf + offset,
 				     dump, "tools-version", TOOLS_VERSION);
 	offset += qed_dump_str_param(dump_buf + offset,
@@ -2121,11 +1243,12 @@ static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_str_param(dump_buf + offset,
 				     dump,
 				     "platform",
-				     s_platform_defs[dev_data->platform_id].
-				     name);
-	offset +=
-	    qed_dump_num_param(dump_buf + offset, dump, "pci-func",
-			       p_hwfn->abs_pf_id);
+				     s_hw_type_defs[dev_data->hw_type].name);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "pci-func", p_hwfn->abs_pf_id);
+	if (dev_data->chip_id == CHIP_BB)
+		offset += qed_dump_num_param(dump_buf + offset,
+					     dump, "path", QED_PATH_ID(p_hwfn));
 
 	return offset;
 }
@@ -2156,24 +1279,87 @@ static void qed_update_blocks_reset_state(struct qed_hwfn *p_hwfn,
 					  struct qed_ptt *p_ptt)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u32 reg_val[MAX_DBG_RESET_REGS] = { 0 };
-	u32 i;
+	u32 reg_val[NUM_DBG_RESET_REGS] = { 0 };
+	u8 rst_reg_id;
+	u32 blk_id;
 
 	/* Read reset registers */
-	for (i = 0; i < MAX_DBG_RESET_REGS; i++)
-		if (s_reset_regs_defs[i].exists[dev_data->chip_id])
-			reg_val[i] = qed_rd(p_hwfn,
-					    p_ptt, s_reset_regs_defs[i].addr);
+	for (rst_reg_id = 0; rst_reg_id < NUM_DBG_RESET_REGS; rst_reg_id++) {
+		const struct dbg_reset_reg *rst_reg;
+		bool rst_reg_removed;
+		u32 rst_reg_addr;
+
+		rst_reg = qed_get_dbg_reset_reg(p_hwfn, rst_reg_id);
+		rst_reg_removed = GET_FIELD(rst_reg->data,
+					    DBG_RESET_REG_IS_REMOVED);
+		rst_reg_addr = DWORDS_TO_BYTES(GET_FIELD(rst_reg->data,
+							 DBG_RESET_REG_ADDR));
+
+		if (!rst_reg_removed)
+			reg_val[rst_reg_id] = qed_rd(p_hwfn, p_ptt,
+						     rst_reg_addr);
+	}
 
 	/* Check if blocks are in reset */
-	for (i = 0; i < MAX_BLOCK_ID; i++) {
-		struct block_defs *block = s_block_defs[i];
+	for (blk_id = 0; blk_id < NUM_PHYS_BLOCKS; blk_id++) {
+		const struct dbg_block_chip *blk;
+		bool has_rst_reg;
+		bool is_removed;
+
+		blk = qed_get_dbg_block_per_chip(p_hwfn, (enum block_id)blk_id);
+		is_removed = GET_FIELD(blk->flags, DBG_BLOCK_CHIP_IS_REMOVED);
+		has_rst_reg = GET_FIELD(blk->flags,
+					DBG_BLOCK_CHIP_HAS_RESET_REG);
+
+		if (!is_removed && has_rst_reg)
+			dev_data->block_in_reset[blk_id] =
+			    !(reg_val[blk->reset_reg_id] &
+			      BIT(blk->reset_reg_bit_offset));
+	}
+}
+
+/* is_mode_match recursive function */
+static bool qed_is_mode_match_rec(struct qed_hwfn *p_hwfn,
+				  u16 *modes_buf_offset, u8 rec_depth)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u8 *dbg_array;
+	bool arg1, arg2;
+	u8 tree_val;
+
+	if (rec_depth > MAX_RECURSION_DEPTH) {
+		DP_NOTICE(p_hwfn,
+			  "Unexpected error: is_mode_match_rec exceeded the max recursion depth. This is probably due to a corrupt init/debug buffer.\n");
+		return false;
+	}
 
-		dev_data->block_in_reset[i] = block->has_reset_bit &&
-		    !(reg_val[block->reset_reg] & BIT(block->reset_bit_offset));
+	/* Get next element from modes tree buffer */
+	dbg_array = p_hwfn->dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr;
+	tree_val = dbg_array[(*modes_buf_offset)++];
+
+	switch (tree_val) {
+	case INIT_MODE_OP_NOT:
+		return !qed_is_mode_match_rec(p_hwfn,
+					      modes_buf_offset, rec_depth + 1);
+	case INIT_MODE_OP_OR:
+	case INIT_MODE_OP_AND:
+		arg1 = qed_is_mode_match_rec(p_hwfn,
+					     modes_buf_offset, rec_depth + 1);
+		arg2 = qed_is_mode_match_rec(p_hwfn,
+					     modes_buf_offset, rec_depth + 1);
+		return (tree_val == INIT_MODE_OP_OR) ? (arg1 ||
+							arg2) : (arg1 && arg2);
+	default:
+		return dev_data->mode_enable[tree_val - MAX_INIT_MODE_OPS] > 0;
 	}
 }
 
+/* Returns true if the mode (specified using modes_buf_offset) is enabled */
+static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset)
+{
+	return qed_is_mode_match_rec(p_hwfn, modes_buf_offset, 0);
+}
+
 /* Enable / disable the Debug block */
 static void qed_bus_enable_dbg_block(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt, bool enable)
@@ -2185,23 +1371,21 @@ static void qed_bus_enable_dbg_block(struct qed_hwfn *p_hwfn,
 static void qed_bus_reset_dbg_block(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt)
 {
-	u32 dbg_reset_reg_addr, old_reset_reg_val, new_reset_reg_val;
-	struct block_defs *dbg_block = s_block_defs[BLOCK_DBG];
+	u32 reset_reg_addr, old_reset_reg_val, new_reset_reg_val;
+	const struct dbg_reset_reg *reset_reg;
+	const struct dbg_block_chip *block;
 
-	dbg_reset_reg_addr = s_reset_regs_defs[dbg_block->reset_reg].addr;
-	old_reset_reg_val = qed_rd(p_hwfn, p_ptt, dbg_reset_reg_addr);
-	new_reset_reg_val =
-	    old_reset_reg_val & ~BIT(dbg_block->reset_bit_offset);
+	block = qed_get_dbg_block_per_chip(p_hwfn, BLOCK_DBG);
+	reset_reg = qed_get_dbg_reset_reg(p_hwfn, block->reset_reg_id);
+	reset_reg_addr =
+	    DWORDS_TO_BYTES(GET_FIELD(reset_reg->data, DBG_RESET_REG_ADDR));
 
-	qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, new_reset_reg_val);
-	qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, old_reset_reg_val);
-}
+	old_reset_reg_val = qed_rd(p_hwfn, p_ptt, reset_reg_addr);
+	new_reset_reg_val =
+	    old_reset_reg_val & ~BIT(block->reset_reg_bit_offset);
 
-static void qed_bus_set_framing_mode(struct qed_hwfn *p_hwfn,
-				     struct qed_ptt *p_ptt,
-				     enum dbg_bus_frame_modes mode)
-{
-	qed_wr(p_hwfn, p_ptt, DBG_REG_FRAMING_MODE, (u8)mode);
+	qed_wr(p_hwfn, p_ptt, reset_reg_addr, new_reset_reg_val);
+	qed_wr(p_hwfn, p_ptt, reset_reg_addr, old_reset_reg_val);
 }
 
 /* Enable / disable Debug Bus clients according to the specified mask
@@ -2213,28 +1397,65 @@ static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn,
 	qed_wr(p_hwfn, p_ptt, DBG_REG_CLIENT_ENABLE, client_mask);
 }
 
-static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset)
+static void qed_bus_config_dbg_line(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    enum block_id block_id,
+				    u8 line_id,
+				    u8 enable_mask,
+				    u8 right_shift,
+				    u8 force_valid_mask, u8 force_frame_mask)
+{
+	const struct dbg_block_chip *block =
+		qed_get_dbg_block_per_chip(p_hwfn, block_id);
+
+	qed_wr(p_hwfn, p_ptt, DWORDS_TO_BYTES(block->dbg_select_reg_addr),
+	       line_id);
+	qed_wr(p_hwfn, p_ptt, DWORDS_TO_BYTES(block->dbg_dword_enable_reg_addr),
+	       enable_mask);
+	qed_wr(p_hwfn, p_ptt, DWORDS_TO_BYTES(block->dbg_shift_reg_addr),
+	       right_shift);
+	qed_wr(p_hwfn, p_ptt, DWORDS_TO_BYTES(block->dbg_force_valid_reg_addr),
+	       force_valid_mask);
+	qed_wr(p_hwfn, p_ptt, DWORDS_TO_BYTES(block->dbg_force_frame_reg_addr),
+	       force_frame_mask);
+}
+
+/* Disable debug bus in all blocks */
+static void qed_bus_disable_blocks(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	bool arg1, arg2;
-	const u32 *ptr;
-	u8 tree_val;
+	u32 block_id;
 
-	/* Get next element from modes tree buffer */
-	ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr;
-	tree_val = ((u8 *)ptr)[(*modes_buf_offset)++];
+	/* Disable all blocks */
+	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+		const struct dbg_block_chip *block_per_chip =
+		    qed_get_dbg_block_per_chip(p_hwfn,
+					       (enum block_id)block_id);
 
-	switch (tree_val) {
-	case INIT_MODE_OP_NOT:
-		return !qed_is_mode_match(p_hwfn, modes_buf_offset);
-	case INIT_MODE_OP_OR:
-	case INIT_MODE_OP_AND:
-		arg1 = qed_is_mode_match(p_hwfn, modes_buf_offset);
-		arg2 = qed_is_mode_match(p_hwfn, modes_buf_offset);
-		return (tree_val == INIT_MODE_OP_OR) ? (arg1 ||
-							arg2) : (arg1 && arg2);
-	default:
-		return dev_data->mode_enable[tree_val - MAX_INIT_MODE_OPS] > 0;
+		if (GET_FIELD(block_per_chip->flags,
+			      DBG_BLOCK_CHIP_IS_REMOVED) ||
+		    dev_data->block_in_reset[block_id])
+			continue;
+
+		/* Disable debug bus */
+		if (GET_FIELD(block_per_chip->flags,
+			      DBG_BLOCK_CHIP_HAS_DBG_BUS)) {
+			u32 dbg_en_addr =
+				block_per_chip->dbg_dword_enable_reg_addr;
+			u16 modes_buf_offset =
+			    GET_FIELD(block_per_chip->dbg_bus_mode.data,
+				      DBG_MODE_HDR_MODES_BUF_OFFSET);
+			bool eval_mode =
+			    GET_FIELD(block_per_chip->dbg_bus_mode.data,
+				      DBG_MODE_HDR_EVAL_MODE) > 0;
+
+			if (!eval_mode ||
+			    qed_is_mode_match(p_hwfn, &modes_buf_offset))
+				qed_wr(p_hwfn, p_ptt,
+				       DWORDS_TO_BYTES(dbg_en_addr),
+				       0);
+		}
 	}
 }
 
@@ -2247,6 +1468,20 @@ static bool qed_grc_is_included(struct qed_hwfn *p_hwfn,
 	return qed_grc_get_param(p_hwfn, grc_param) > 0;
 }
 
+/* Returns the storm_id that matches the specified Storm letter,
+ * or MAX_DBG_STORMS if invalid storm letter.
+ */
+static enum dbg_storms qed_get_id_from_letter(char storm_letter)
+{
+	u8 storm_id;
+
+	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++)
+		if (s_storm_defs[storm_id].letter == storm_letter)
+			return (enum dbg_storms)storm_id;
+
+	return MAX_DBG_STORMS;
+}
+
 /* Returns true of the specified Storm should be included in the dump, false
  * otherwise.
  */
@@ -2262,14 +1497,20 @@ static bool qed_grc_is_storm_included(struct qed_hwfn *p_hwfn,
 static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn,
 				    enum block_id block_id, u8 mem_group_id)
 {
-	struct block_defs *block = s_block_defs[block_id];
+	const struct dbg_block *block;
 	u8 i;
 
-	/* Check Storm match */
-	if (block->associated_to_storm &&
-	    !qed_grc_is_storm_included(p_hwfn,
-				       (enum dbg_storms)block->storm_id))
-		return false;
+	block = get_dbg_block(p_hwfn, block_id);
+
+	/* If the block is associated with a Storm, check Storm match */
+	if (block->associated_storm_letter) {
+		enum dbg_storms associated_storm_id =
+		    qed_get_id_from_letter(block->associated_storm_letter);
+
+		if (associated_storm_id == MAX_DBG_STORMS ||
+		    !qed_grc_is_storm_included(p_hwfn, associated_storm_id))
+			return false;
+	}
 
 	for (i = 0; i < NUM_BIG_RAM_TYPES; i++) {
 		struct big_ram_defs *big_ram = &s_big_ram_defs[i];
@@ -2291,6 +1532,8 @@ static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn,
 	case MEM_GROUP_CAU_SB:
 	case MEM_GROUP_CAU_PI:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU);
+	case MEM_GROUP_CAU_MEM_EXT:
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU_EXT);
 	case MEM_GROUP_QM_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_QM);
 	case MEM_GROUP_CFC_MEM:
@@ -2298,6 +1541,8 @@ static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn,
 	case MEM_GROUP_TASK_CFC_MEM:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CFC) ||
 		       qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM_CTX);
+	case MEM_GROUP_DORQ_MEM:
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DORQ);
 	case MEM_GROUP_IGU_MEM:
 	case MEM_GROUP_IGU_MSIX:
 		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IGU);
@@ -2343,64 +1588,104 @@ static void qed_grc_stall_storms(struct qed_hwfn *p_hwfn,
 	msleep(STALL_DELAY_MS);
 }
 
-/* Takes all blocks out of reset */
+/* Takes all blocks out of reset. If rbc_only is true, only RBC clients are
+ * taken out of reset.
+ */
 static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt)
+				   struct qed_ptt *p_ptt, bool rbc_only)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u32 reg_val[MAX_DBG_RESET_REGS] = { 0 };
-	u32 block_id, i;
+	u8 chip_id = dev_data->chip_id;
+	u32 i;
 
-	/* Fill reset regs values */
-	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
-		struct block_defs *block = s_block_defs[block_id];
+	/* Take RBCs out of reset */
+	for (i = 0; i < ARRAY_SIZE(s_rbc_reset_defs); i++)
+		if (s_rbc_reset_defs[i].reset_val[dev_data->chip_id])
+			qed_wr(p_hwfn,
+			       p_ptt,
+			       s_rbc_reset_defs[i].reset_reg_addr +
+			       RESET_REG_UNRESET_OFFSET,
+			       s_rbc_reset_defs[i].reset_val[chip_id]);
 
-		if (block->exists[dev_data->chip_id] && block->has_reset_bit &&
-		    block->unreset)
-			reg_val[block->reset_reg] |=
-			    BIT(block->reset_bit_offset);
-	}
+	if (!rbc_only) {
+		u32 reg_val[NUM_DBG_RESET_REGS] = { 0 };
+		u8 reset_reg_id;
+		u32 block_id;
 
-	/* Write reset registers */
-	for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
-		if (!s_reset_regs_defs[i].exists[dev_data->chip_id])
-			continue;
+		/* Fill reset regs values */
+		for (block_id = 0; block_id < NUM_PHYS_BLOCKS; block_id++) {
+			bool is_removed, has_reset_reg, unreset_before_dump;
+			const struct dbg_block_chip *block;
+
+			block = qed_get_dbg_block_per_chip(p_hwfn,
+							   (enum block_id)
+							   block_id);
+			is_removed =
+			    GET_FIELD(block->flags, DBG_BLOCK_CHIP_IS_REMOVED);
+			has_reset_reg =
+			    GET_FIELD(block->flags,
+				      DBG_BLOCK_CHIP_HAS_RESET_REG);
+			unreset_before_dump =
+			    GET_FIELD(block->flags,
+				      DBG_BLOCK_CHIP_UNRESET_BEFORE_DUMP);
+
+			if (!is_removed && has_reset_reg && unreset_before_dump)
+				reg_val[block->reset_reg_id] |=
+				    BIT(block->reset_reg_bit_offset);
+		}
 
-		reg_val[i] |=
-			s_reset_regs_defs[i].unreset_val[dev_data->chip_id];
+		/* Write reset registers */
+		for (reset_reg_id = 0; reset_reg_id < NUM_DBG_RESET_REGS;
+		     reset_reg_id++) {
+			const struct dbg_reset_reg *reset_reg;
+			u32 reset_reg_addr;
 
-		if (reg_val[i])
-			qed_wr(p_hwfn,
-			       p_ptt,
-			       s_reset_regs_defs[i].addr +
-			       RESET_REG_UNRESET_OFFSET, reg_val[i]);
+			reset_reg = qed_get_dbg_reset_reg(p_hwfn, reset_reg_id);
+
+			if (GET_FIELD
+			    (reset_reg->data, DBG_RESET_REG_IS_REMOVED))
+				continue;
+
+			if (reg_val[reset_reg_id]) {
+				reset_reg_addr =
+				    GET_FIELD(reset_reg->data,
+					      DBG_RESET_REG_ADDR);
+				qed_wr(p_hwfn,
+				       p_ptt,
+				       DWORDS_TO_BYTES(reset_reg_addr) +
+				       RESET_REG_UNRESET_OFFSET,
+				       reg_val[reset_reg_id]);
+			}
+		}
 	}
 }
 
 /* Returns the attention block data of the specified block */
 static const struct dbg_attn_block_type_data *
-qed_get_block_attn_data(enum block_id block_id, enum dbg_attn_type attn_type)
+qed_get_block_attn_data(struct qed_hwfn *p_hwfn,
+			enum block_id block_id, enum dbg_attn_type attn_type)
 {
 	const struct dbg_attn_block *base_attn_block_arr =
-		(const struct dbg_attn_block *)
-		s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr;
+	    (const struct dbg_attn_block *)
+	    p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr;
 
 	return &base_attn_block_arr[block_id].per_type_data[attn_type];
 }
 
 /* Returns the attention registers of the specified block */
 static const struct dbg_attn_reg *
-qed_get_block_attn_regs(enum block_id block_id, enum dbg_attn_type attn_type,
+qed_get_block_attn_regs(struct qed_hwfn *p_hwfn,
+			enum block_id block_id, enum dbg_attn_type attn_type,
 			u8 *num_attn_regs)
 {
 	const struct dbg_attn_block_type_data *block_type_data =
-		qed_get_block_attn_data(block_id, attn_type);
+	    qed_get_block_attn_data(p_hwfn, block_id, attn_type);
 
 	*num_attn_regs = block_type_data->num_regs;
 
-	return &((const struct dbg_attn_reg *)
-		 s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)[block_type_data->
-							  regs_offset];
+	return (const struct dbg_attn_reg *)
+		p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr +
+		block_type_data->regs_offset;
 }
 
 /* For each block, clear the status of all parities */
@@ -2412,11 +1697,12 @@ static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn,
 	u8 reg_idx, num_attn_regs;
 	u32 block_id;
 
-	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+	for (block_id = 0; block_id < NUM_PHYS_BLOCKS; block_id++) {
 		if (dev_data->block_in_reset[block_id])
 			continue;
 
-		attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
+		attn_reg_arr = qed_get_block_attn_regs(p_hwfn,
+						       (enum block_id)block_id,
 						       ATTN_TYPE_PARITY,
 						       &num_attn_regs);
 
@@ -2444,22 +1730,20 @@ static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn,
 }
 
 /* Dumps GRC registers section header. Returns the dumped size in dwords.
- * The following parameters are dumped:
+ * the following parameters are dumped:
  * - count: no. of dumped entries
  * - split_type: split type
  * - split_id: split ID (dumped only if split_id != SPLIT_TYPE_NONE)
- * - param_name: user parameter value (dumped only if param_name != NULL
- *		 and param_val != NULL).
+ * - reg_type_name: register type name (dumped only if reg_type_name != NULL)
  */
 static u32 qed_grc_dump_regs_hdr(u32 *dump_buf,
 				 bool dump,
 				 u32 num_reg_entries,
 				 enum init_split_types split_type,
-				 u8 split_id,
-				 const char *param_name, const char *param_val)
+				 u8 split_id, const char *reg_type_name)
 {
 	u8 num_params = 2 +
-	    (split_type != SPLIT_TYPE_NONE ? 1 : 0) + (param_name ? 1 : 0);
+	    (split_type != SPLIT_TYPE_NONE ? 1 : 0) + (reg_type_name ? 1 : 0);
 	u32 offset = 0;
 
 	offset += qed_dump_section_hdr(dump_buf + offset,
@@ -2472,9 +1756,9 @@ static u32 qed_grc_dump_regs_hdr(u32 *dump_buf,
 	if (split_type != SPLIT_TYPE_NONE)
 		offset += qed_dump_num_param(dump_buf + offset,
 					     dump, "id", split_id);
-	if (param_name && param_val)
+	if (reg_type_name)
 		offset += qed_dump_str_param(dump_buf + offset,
-					     dump, param_name, param_val);
+					     dump, "type", reg_type_name);
 
 	return offset;
 }
@@ -2504,21 +1788,12 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	u8 port_id = 0, pf_id = 0, vf_id = 0, fid = 0;
+	bool read_using_dmae = false;
+	u32 thresh;
 
 	if (!dump)
 		return len;
 
-	/* Print log if needed */
-	dev_data->num_regs_read += len;
-	if (dev_data->num_regs_read >=
-	    s_platform_defs[dev_data->platform_id].log_thresh) {
-		DP_VERBOSE(p_hwfn,
-			   QED_MSG_DEBUG,
-			   "Dumping %d registers...\n",
-			   dev_data->num_regs_read);
-		dev_data->num_regs_read = 0;
-	}
-
 	switch (split_type) {
 	case SPLIT_TYPE_PORT:
 		port_id = split_id;
@@ -2539,38 +1814,77 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Try reading using DMAE */
-	if (dev_data->use_dmae && split_type == SPLIT_TYPE_NONE &&
-	    (len >= s_platform_defs[dev_data->platform_id].dmae_thresh ||
-	     wide_bus)) {
-		if (!qed_dmae_grc2host(p_hwfn, p_ptt, DWORDS_TO_BYTES(addr),
-				       (u64)(uintptr_t)(dump_buf), len, NULL))
-			return len;
-		dev_data->use_dmae = 0;
-		DP_VERBOSE(p_hwfn,
-			   QED_MSG_DEBUG,
-			   "Failed reading from chip using DMAE, using GRC instead\n");
+	if (dev_data->use_dmae && split_type != SPLIT_TYPE_VF &&
+	    (len >= s_hw_type_defs[dev_data->hw_type].dmae_thresh ||
+	     (PROTECT_WIDE_BUS && wide_bus))) {
+		struct qed_dmae_params dmae_params;
+
+		/* Set DMAE params */
+		memset(&dmae_params, 0, sizeof(dmae_params));
+		SET_FIELD(dmae_params.flags, QED_DMAE_PARAMS_COMPLETION_DST, 1);
+		switch (split_type) {
+		case SPLIT_TYPE_PORT:
+			SET_FIELD(dmae_params.flags, QED_DMAE_PARAMS_PORT_VALID,
+				  1);
+			dmae_params.port_id = port_id;
+			break;
+		case SPLIT_TYPE_PF:
+			SET_FIELD(dmae_params.flags,
+				  QED_DMAE_PARAMS_SRC_PF_VALID, 1);
+			dmae_params.src_pfid = pf_id;
+			break;
+		case SPLIT_TYPE_PORT_PF:
+			SET_FIELD(dmae_params.flags, QED_DMAE_PARAMS_PORT_VALID,
+				  1);
+			SET_FIELD(dmae_params.flags,
+				  QED_DMAE_PARAMS_SRC_PF_VALID, 1);
+			dmae_params.port_id = port_id;
+			dmae_params.src_pfid = pf_id;
+			break;
+		default:
+			break;
+		}
+
+		/* Execute DMAE command */
+		read_using_dmae = !qed_dmae_grc2host(p_hwfn,
+						     p_ptt,
+						     DWORDS_TO_BYTES(addr),
+						     (u64)(uintptr_t)(dump_buf),
+						     len, &dmae_params);
+		if (!read_using_dmae) {
+			dev_data->use_dmae = 0;
+			DP_VERBOSE(p_hwfn,
+				   QED_MSG_DEBUG,
+				   "Failed reading from chip using DMAE, using GRC instead\n");
+		}
 	}
 
+	if (read_using_dmae)
+		goto print_log;
+
 	/* If not read using DMAE, read using GRC */
 
 	/* Set pretend */
-	if (split_type != dev_data->pretend.split_type || split_id !=
-	    dev_data->pretend.split_id) {
+	if (split_type != dev_data->pretend.split_type ||
+	    split_id != dev_data->pretend.split_id) {
 		switch (split_type) {
 		case SPLIT_TYPE_PORT:
 			qed_port_pretend(p_hwfn, p_ptt, port_id);
 			break;
 		case SPLIT_TYPE_PF:
-			fid = pf_id << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT;
+			fid = FIELD_VALUE(PXP_PRETEND_CONCRETE_FID_PFID,
+					  pf_id);
 			qed_fid_pretend(p_hwfn, p_ptt, fid);
 			break;
 		case SPLIT_TYPE_PORT_PF:
-			fid = pf_id << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT;
+			fid = FIELD_VALUE(PXP_PRETEND_CONCRETE_FID_PFID,
+					  pf_id);
 			qed_port_fid_pretend(p_hwfn, p_ptt, port_id, fid);
 			break;
 		case SPLIT_TYPE_VF:
-			fid = BIT(PXP_PRETEND_CONCRETE_FID_VFVALID_SHIFT) |
-			      (vf_id << PXP_PRETEND_CONCRETE_FID_VFID_SHIFT);
+			fid = FIELD_VALUE(PXP_PRETEND_CONCRETE_FID_VFVALID, 1)
+			      | FIELD_VALUE(PXP_PRETEND_CONCRETE_FID_VFID,
+					  vf_id);
 			qed_fid_pretend(p_hwfn, p_ptt, fid);
 			break;
 		default:
@@ -2584,6 +1898,16 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
 	/* Read registers using GRC */
 	qed_read_regs(p_hwfn, p_ptt, dump_buf, addr, len);
 
+print_log:
+	/* Print log */
+	dev_data->num_regs_read += len;
+	thresh = s_hw_type_defs[dev_data->hw_type].log_thresh;
+	if ((dev_data->num_regs_read / thresh) >
+	    ((dev_data->num_regs_read - len) / thresh))
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_DEBUG,
+			   "Dumped %d registers...\n", dev_data->num_regs_read);
+
 	return len;
 }
 
@@ -2668,7 +1992,7 @@ static u32 qed_grc_dump_reg_entry_skip(struct qed_hwfn *p_hwfn,
 /* Dumps GRC registers entries. Returns the dumped size in dwords. */
 static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt,
-				     struct dbg_array input_regs_arr,
+				     struct virt_mem_desc input_regs_arr,
 				     u32 *dump_buf,
 				     bool dump,
 				     enum init_split_types split_type,
@@ -2681,10 +2005,10 @@ static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
 
 	*num_dumped_reg_entries = 0;
 
-	while (input_offset < input_regs_arr.size_in_dwords) {
+	while (input_offset < BYTES_TO_DWORDS(input_regs_arr.size)) {
 		const struct dbg_dump_cond_hdr *cond_hdr =
 		    (const struct dbg_dump_cond_hdr *)
-		    &input_regs_arr.ptr[input_offset++];
+		    input_regs_arr.ptr + input_offset++;
 		u16 modes_buf_offset;
 		bool eval_mode;
 
@@ -2707,7 +2031,7 @@ static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
 		for (i = 0; i < cond_hdr->data_size; i++, input_offset++) {
 			const struct dbg_dump_reg *reg =
 			    (const struct dbg_dump_reg *)
-			    &input_regs_arr.ptr[input_offset];
+			    input_regs_arr.ptr + input_offset;
 			u32 addr, len;
 			bool wide_bus;
 
@@ -2732,14 +2056,12 @@ static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
 /* Dumps GRC registers entries. Returns the dumped size in dwords. */
 static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt,
-				   struct dbg_array input_regs_arr,
+				   struct virt_mem_desc input_regs_arr,
 				   u32 *dump_buf,
 				   bool dump,
 				   bool block_enable[MAX_BLOCK_ID],
 				   enum init_split_types split_type,
-				   u8 split_id,
-				   const char *param_name,
-				   const char *param_val)
+				   u8 split_id, const char *reg_type_name)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	enum init_split_types hdr_split_type = split_type;
@@ -2757,7 +2079,7 @@ static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn,
 				       false,
 				       0,
 				       hdr_split_type,
-				       hdr_split_id, param_name, param_val);
+				       hdr_split_id, reg_type_name);
 
 	/* Dump registers */
 	offset += qed_grc_dump_regs_entries(p_hwfn,
@@ -2776,7 +2098,7 @@ static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn,
 				      dump,
 				      num_dumped_reg_entries,
 				      hdr_split_type,
-				      hdr_split_id, param_name, param_val);
+				      hdr_split_id, reg_type_name);
 
 	return num_dumped_reg_entries > 0 ? offset : 0;
 }
@@ -2789,32 +2111,33 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 				  u32 *dump_buf,
 				  bool dump,
 				  bool block_enable[MAX_BLOCK_ID],
-				  const char *param_name, const char *param_val)
+				  const char *reg_type_name)
 {
+	struct virt_mem_desc *dbg_buf =
+	    &p_hwfn->dbg_arrays[BIN_BUF_DBG_DUMP_REG];
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	u32 offset = 0, input_offset = 0;
-	u16 fid;
-	while (input_offset <
-	       s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].size_in_dwords) {
+
+	while (input_offset < BYTES_TO_DWORDS(dbg_buf->size)) {
 		const struct dbg_dump_split_hdr *split_hdr;
-		struct dbg_array curr_input_regs_arr;
+		struct virt_mem_desc curr_input_regs_arr;
 		enum init_split_types split_type;
 		u16 split_count = 0;
 		u32 split_data_size;
 		u8 split_id;
 
 		split_hdr =
-			(const struct dbg_dump_split_hdr *)
-			&s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset++];
+		    (const struct dbg_dump_split_hdr *)
+		    dbg_buf->ptr + input_offset++;
 		split_type =
-			GET_FIELD(split_hdr->hdr,
-				  DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
-		split_data_size =
-			GET_FIELD(split_hdr->hdr,
-				  DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+		    GET_FIELD(split_hdr->hdr,
+			      DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+		split_data_size = GET_FIELD(split_hdr->hdr,
+					    DBG_DUMP_SPLIT_HDR_DATA_SIZE);
 		curr_input_regs_arr.ptr =
-			&s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset];
-		curr_input_regs_arr.size_in_dwords = split_data_size;
+		    (u32 *)p_hwfn->dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr +
+		    input_offset;
+		curr_input_regs_arr.size = DWORDS_TO_BYTES(split_data_size);
 
 		switch (split_type) {
 		case SPLIT_TYPE_NONE:
@@ -2842,16 +2165,16 @@ static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
 							  dump, block_enable,
 							  split_type,
 							  split_id,
-							  param_name,
-							  param_val);
+							  reg_type_name);
 
 		input_offset += split_data_size;
 	}
 
 	/* Cancel pretends (pretend to original PF) */
 	if (dump) {
-		fid = p_hwfn->rel_pf_id << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT;
-		qed_fid_pretend(p_hwfn, p_ptt, fid);
+		qed_fid_pretend(p_hwfn, p_ptt,
+				FIELD_VALUE(PXP_PRETEND_CONCRETE_FID_PFID,
+					    p_hwfn->rel_pf_id));
 		dev_data->pretend.split_type = SPLIT_TYPE_NONE;
 		dev_data->pretend.split_id = 0;
 	}
@@ -2864,26 +2187,32 @@ static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt,
 				   u32 *dump_buf, bool dump)
 {
-	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u32 i, offset = 0, num_regs = 0;
+	u32 offset = 0, num_regs = 0;
+	u8 reset_reg_id;
 
 	/* Calculate header size */
 	offset += qed_grc_dump_regs_hdr(dump_buf,
-					false, 0,
-					SPLIT_TYPE_NONE, 0, NULL, NULL);
+					false,
+					0, SPLIT_TYPE_NONE, 0, "RESET_REGS");
 
 	/* Write reset registers */
-	for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
-		if (!s_reset_regs_defs[i].exists[dev_data->chip_id])
+	for (reset_reg_id = 0; reset_reg_id < NUM_DBG_RESET_REGS;
+	     reset_reg_id++) {
+		const struct dbg_reset_reg *reset_reg;
+		u32 reset_reg_addr;
+
+		reset_reg = qed_get_dbg_reset_reg(p_hwfn, reset_reg_id);
+
+		if (GET_FIELD(reset_reg->data, DBG_RESET_REG_IS_REMOVED))
 			continue;
 
+		reset_reg_addr = GET_FIELD(reset_reg->data, DBG_RESET_REG_ADDR);
 		offset += qed_grc_dump_reg_entry(p_hwfn,
 						 p_ptt,
 						 dump_buf + offset,
 						 dump,
-						 BYTES_TO_DWORDS
-						 (s_reset_regs_defs[i].addr), 1,
-						 false, SPLIT_TYPE_NONE, 0);
+						 reset_reg_addr,
+						 1, false, SPLIT_TYPE_NONE, 0);
 		num_regs++;
 	}
 
@@ -2891,7 +2220,7 @@ static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn,
 	if (dump)
 		qed_grc_dump_regs_hdr(dump_buf,
 				      true, num_regs, SPLIT_TYPE_NONE,
-				      0, NULL, NULL);
+				      0, "RESET_REGS");
 
 	return offset;
 }
@@ -2904,21 +2233,23 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 				      u32 *dump_buf, bool dump)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u32 block_id, offset = 0, num_reg_entries = 0;
+	u32 block_id, offset = 0, stall_regs_offset;
 	const struct dbg_attn_reg *attn_reg_arr;
 	u8 storm_id, reg_idx, num_attn_regs;
+	u32 num_reg_entries = 0;
 
-	/* Calculate header size */
+	/* Write empty header for attention registers */
 	offset += qed_grc_dump_regs_hdr(dump_buf,
-					false, 0, SPLIT_TYPE_NONE,
-					0, NULL, NULL);
+					false,
+					0, SPLIT_TYPE_NONE, 0, "ATTN_REGS");
 
 	/* Write parity registers */
-	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+	for (block_id = 0; block_id < NUM_PHYS_BLOCKS; block_id++) {
 		if (dev_data->block_in_reset[block_id] && dump)
 			continue;
 
-		attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
+		attn_reg_arr = qed_get_block_attn_regs(p_hwfn,
+						       (enum block_id)block_id,
 						       ATTN_TYPE_PARITY,
 						       &num_attn_regs);
 
@@ -2961,16 +2292,29 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 		}
 	}
 
+	/* Overwrite header for attention registers */
+	if (dump)
+		qed_grc_dump_regs_hdr(dump_buf,
+				      true,
+				      num_reg_entries,
+				      SPLIT_TYPE_NONE, 0, "ATTN_REGS");
+
+	/* Write empty header for stall registers */
+	stall_regs_offset = offset;
+	offset += qed_grc_dump_regs_hdr(dump_buf,
+					false, 0, SPLIT_TYPE_NONE, 0, "REGS");
+
 	/* Write Storm stall status registers */
-	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+	for (storm_id = 0, num_reg_entries = 0; storm_id < MAX_DBG_STORMS;
+	     storm_id++) {
 		struct storm_defs *storm = &s_storm_defs[storm_id];
 		u32 addr;
 
-		if (dev_data->block_in_reset[storm->block_id] && dump)
+		if (dev_data->block_in_reset[storm->sem_block_id] && dump)
 			continue;
 
 		addr =
-		    BYTES_TO_DWORDS(s_storm_defs[storm_id].sem_fast_mem_addr +
+		    BYTES_TO_DWORDS(storm->sem_fast_mem_addr +
 				    SEM_FAST_REG_STALLED);
 		offset += qed_grc_dump_reg_entry(p_hwfn,
 						 p_ptt,
@@ -2982,12 +2326,12 @@ static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
 		num_reg_entries++;
 	}
 
-	/* Write header */
+	/* Overwrite header for stall registers */
 	if (dump)
-		qed_grc_dump_regs_hdr(dump_buf,
+		qed_grc_dump_regs_hdr(dump_buf + stall_regs_offset,
 				      true,
-				      num_reg_entries, SPLIT_TYPE_NONE,
-				      0, NULL, NULL);
+				      num_reg_entries,
+				      SPLIT_TYPE_NONE, 0, "REGS");
 
 	return offset;
 }
@@ -3000,8 +2344,7 @@ static u32 qed_grc_dump_special_regs(struct qed_hwfn *p_hwfn,
 	u32 offset = 0, addr;
 
 	offset += qed_grc_dump_regs_hdr(dump_buf,
-					dump, 2, SPLIT_TYPE_NONE, 0,
-					NULL, NULL);
+					dump, 2, SPLIT_TYPE_NONE, 0, "REGS");
 
 	/* Dump R/TDIF_REG_DEBUG_ERROR_INFO_SIZE (every 8'th register should be
 	 * skipped).
@@ -3049,8 +2392,7 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 				u32 len,
 				u32 bit_width,
 				bool packed,
-				const char *mem_group,
-				bool is_storm, char storm_letter)
+				const char *mem_group, char storm_letter)
 {
 	u8 num_params = 3;
 	u32 offset = 0;
@@ -3071,7 +2413,7 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 
 	if (name) {
 		/* Dump name */
-		if (is_storm) {
+		if (storm_letter) {
 			strcpy(buf, "?STORM_");
 			buf[0] = storm_letter;
 			strcpy(buf + strlen(buf), name);
@@ -3103,7 +2445,7 @@ static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
 					     dump, "packed", 1);
 
 	/* Dump reg type */
-	if (is_storm) {
+	if (storm_letter) {
 		strcpy(buf, "?STORM_");
 		buf[0] = storm_letter;
 		strcpy(buf + strlen(buf), mem_group);
@@ -3130,8 +2472,7 @@ static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
 			    bool wide_bus,
 			    u32 bit_width,
 			    bool packed,
-			    const char *mem_group,
-			    bool is_storm, char storm_letter)
+			    const char *mem_group, char storm_letter)
 {
 	u32 offset = 0;
 
@@ -3142,8 +2483,7 @@ static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
 				       addr,
 				       len,
 				       bit_width,
-				       packed,
-				       mem_group, is_storm, storm_letter);
+				       packed, mem_group, storm_letter);
 	offset += qed_grc_dump_addr_range(p_hwfn,
 					  p_ptt,
 					  dump_buf + offset,
@@ -3156,20 +2496,21 @@ static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
 /* Dumps GRC memories entries. Returns the dumped size in dwords. */
 static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt,
-				    struct dbg_array input_mems_arr,
+				    struct virt_mem_desc input_mems_arr,
 				    u32 *dump_buf, bool dump)
 {
 	u32 i, offset = 0, input_offset = 0;
 	bool mode_match = true;
 
-	while (input_offset < input_mems_arr.size_in_dwords) {
+	while (input_offset < BYTES_TO_DWORDS(input_mems_arr.size)) {
 		const struct dbg_dump_cond_hdr *cond_hdr;
 		u16 modes_buf_offset;
 		u32 num_entries;
 		bool eval_mode;
 
-		cond_hdr = (const struct dbg_dump_cond_hdr *)
-			   &input_mems_arr.ptr[input_offset++];
+		cond_hdr =
+		    (const struct dbg_dump_cond_hdr *)input_mems_arr.ptr +
+		    input_offset++;
 		num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS;
 
 		/* Check required mode */
@@ -3191,24 +2532,25 @@ static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
 		for (i = 0; i < num_entries;
 		     i++, input_offset += MEM_DUMP_ENTRY_SIZE_DWORDS) {
 			const struct dbg_dump_mem *mem =
-				(const struct dbg_dump_mem *)
-				&input_mems_arr.ptr[input_offset];
-			u8 mem_group_id = GET_FIELD(mem->dword0,
-						    DBG_DUMP_MEM_MEM_GROUP_ID);
-			bool is_storm = false, mem_wide_bus;
-			enum dbg_grc_params grc_param;
-			char storm_letter = 'a';
-			enum block_id block_id;
+			    (const struct dbg_dump_mem *)((u32 *)
+							  input_mems_arr.ptr
+							  + input_offset);
+			const struct dbg_block *block;
+			char storm_letter = 0;
 			u32 mem_addr, mem_len;
+			bool mem_wide_bus;
+			u8 mem_group_id;
 
+			mem_group_id = GET_FIELD(mem->dword0,
+						 DBG_DUMP_MEM_MEM_GROUP_ID);
 			if (mem_group_id >= MEM_GROUPS_NUM) {
 				DP_NOTICE(p_hwfn, "Invalid mem_group_id\n");
 				return 0;
 			}
 
-			block_id = (enum block_id)cond_hdr->block_id;
 			if (!qed_grc_is_mem_included(p_hwfn,
-						     block_id,
+						     (enum block_id)
+						     cond_hdr->block_id,
 						     mem_group_id))
 				continue;
 
@@ -3217,42 +2559,14 @@ static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
 			mem_wide_bus = GET_FIELD(mem->dword1,
 						 DBG_DUMP_MEM_WIDE_BUS);
 
-			/* Update memory length for CCFC/TCFC memories
-			 * according to number of LCIDs/LTIDs.
-			 */
-			if (mem_group_id == MEM_GROUP_CONN_CFC_MEM) {
-				if (mem_len % MAX_LCIDS) {
-					DP_NOTICE(p_hwfn,
-						  "Invalid CCFC connection memory size\n");
-					return 0;
-				}
-
-				grc_param = DBG_GRC_PARAM_NUM_LCIDS;
-				mem_len = qed_grc_get_param(p_hwfn, grc_param) *
-					  (mem_len / MAX_LCIDS);
-			} else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM) {
-				if (mem_len % MAX_LTIDS) {
-					DP_NOTICE(p_hwfn,
-						  "Invalid TCFC task memory size\n");
-					return 0;
-				}
-
-				grc_param = DBG_GRC_PARAM_NUM_LTIDS;
-				mem_len = qed_grc_get_param(p_hwfn, grc_param) *
-					  (mem_len / MAX_LTIDS);
-			}
+			block = get_dbg_block(p_hwfn,
+					      cond_hdr->block_id);
 
-			/* If memory is associated with Storm, update Storm
-			 * details.
+			/* If memory is associated with Storm,
+			 * update storm details
 			 */
-			if (s_block_defs
-			    [cond_hdr->block_id]->associated_to_storm) {
-				is_storm = true;
-				storm_letter =
-				    s_storm_defs[s_block_defs
-						 [cond_hdr->block_id]->
-						 storm_id].letter;
-			}
+			if (block->associated_storm_letter)
+				storm_letter = block->associated_storm_letter;
 
 			/* Dump memory */
 			offset += qed_grc_dump_mem(p_hwfn,
@@ -3266,7 +2580,6 @@ static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
 						0,
 						false,
 						s_mem_group_names[mem_group_id],
-						is_storm,
 						storm_letter);
 		}
 	}
@@ -3281,26 +2594,25 @@ static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt,
 				 u32 *dump_buf, bool dump)
 {
+	struct virt_mem_desc *dbg_buf =
+	    &p_hwfn->dbg_arrays[BIN_BUF_DBG_DUMP_MEM];
 	u32 offset = 0, input_offset = 0;
 
-	while (input_offset <
-	       s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].size_in_dwords) {
+	while (input_offset < BYTES_TO_DWORDS(dbg_buf->size)) {
 		const struct dbg_dump_split_hdr *split_hdr;
-		struct dbg_array curr_input_mems_arr;
+		struct virt_mem_desc curr_input_mems_arr;
 		enum init_split_types split_type;
 		u32 split_data_size;
 
-		split_hdr = (const struct dbg_dump_split_hdr *)
-			&s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset++];
-		split_type =
-			GET_FIELD(split_hdr->hdr,
-				  DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
-		split_data_size =
-			GET_FIELD(split_hdr->hdr,
-				  DBG_DUMP_SPLIT_HDR_DATA_SIZE);
-		curr_input_mems_arr.ptr =
-			&s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset];
-		curr_input_mems_arr.size_in_dwords = split_data_size;
+		split_hdr =
+		    (const struct dbg_dump_split_hdr *)dbg_buf->ptr +
+		    input_offset++;
+		split_type = GET_FIELD(split_hdr->hdr,
+				       DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+		split_data_size = GET_FIELD(split_hdr->hdr,
+					    DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+		curr_input_mems_arr.ptr = (u32 *)dbg_buf->ptr + input_offset;
+		curr_input_mems_arr.size = DWORDS_TO_BYTES(split_data_size);
 
 		if (split_type == SPLIT_TYPE_NONE)
 			offset += qed_grc_dump_mem_entries(p_hwfn,
@@ -3328,17 +2640,19 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 				 bool dump,
 				 const char *name,
 				 u32 num_lids,
-				 u32 lid_size,
-				 u32 rd_reg_addr,
-				 u8 storm_id)
+				 enum cm_ctx_types ctx_type, u8 storm_id)
 {
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	struct storm_defs *storm = &s_storm_defs[storm_id];
-	u32 i, lid, total_size, offset = 0;
+	u32 i, lid, lid_size, total_size;
+	u32 rd_reg_addr, offset = 0;
+
+	/* Convert quad-regs to dwords */
+	lid_size = storm->cm_ctx_lid_sizes[dev_data->chip_id][ctx_type] * 4;
 
 	if (!lid_size)
 		return 0;
 
-	lid_size *= BYTES_IN_DWORD;
 	total_size = num_lids * lid_size;
 
 	offset += qed_grc_dump_mem_hdr(p_hwfn,
@@ -3348,18 +2662,26 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 				       0,
 				       total_size,
 				       lid_size * 32,
-				       false, name, true, storm->letter);
+				       false, name, storm->letter);
 
 	if (!dump)
 		return offset + total_size;
 
+	rd_reg_addr = BYTES_TO_DWORDS(storm->cm_ctx_rd_addr[ctx_type]);
+
 	/* Dump context data */
 	for (lid = 0; lid < num_lids; lid++) {
-		for (i = 0; i < lid_size; i++, offset++) {
+		for (i = 0; i < lid_size; i++) {
 			qed_wr(p_hwfn,
 			       p_ptt, storm->cm_ctx_wr_addr, (i << 9) | lid);
-			*(dump_buf + offset) = qed_rd(p_hwfn,
-						      p_ptt, rd_reg_addr);
+			offset += qed_grc_dump_addr_range(p_hwfn,
+							  p_ptt,
+							  dump_buf + offset,
+							  dump,
+							  rd_reg_addr,
+							  1,
+							  false,
+							  SPLIT_TYPE_NONE, 0);
 		}
 	}
 
@@ -3370,115 +2692,126 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
 static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
-	enum dbg_grc_params grc_param;
 	u32 offset = 0;
 	u8 storm_id;
 
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
-		struct storm_defs *storm = &s_storm_defs[storm_id];
-
 		if (!qed_grc_is_storm_included(p_hwfn,
 					       (enum dbg_storms)storm_id))
 			continue;
 
 		/* Dump Conn AG context size */
-		grc_param = DBG_GRC_PARAM_NUM_LCIDS;
-		offset +=
-			qed_grc_dump_ctx_data(p_hwfn,
-					      p_ptt,
-					      dump_buf + offset,
-					      dump,
-					      "CONN_AG_CTX",
-					      qed_grc_get_param(p_hwfn,
-								grc_param),
-					      storm->cm_conn_ag_ctx_lid_size,
-					      storm->cm_conn_ag_ctx_rd_addr,
-					      storm_id);
+		offset += qed_grc_dump_ctx_data(p_hwfn,
+						p_ptt,
+						dump_buf + offset,
+						dump,
+						"CONN_AG_CTX",
+						NUM_OF_LCIDS,
+						CM_CTX_CONN_AG, storm_id);
 
 		/* Dump Conn ST context size */
-		grc_param = DBG_GRC_PARAM_NUM_LCIDS;
-		offset +=
-			qed_grc_dump_ctx_data(p_hwfn,
-					      p_ptt,
-					      dump_buf + offset,
-					      dump,
-					      "CONN_ST_CTX",
-					      qed_grc_get_param(p_hwfn,
-								grc_param),
-					      storm->cm_conn_st_ctx_lid_size,
-					      storm->cm_conn_st_ctx_rd_addr,
-					      storm_id);
+		offset += qed_grc_dump_ctx_data(p_hwfn,
+						p_ptt,
+						dump_buf + offset,
+						dump,
+						"CONN_ST_CTX",
+						NUM_OF_LCIDS,
+						CM_CTX_CONN_ST, storm_id);
 
 		/* Dump Task AG context size */
-		grc_param = DBG_GRC_PARAM_NUM_LTIDS;
-		offset +=
-			qed_grc_dump_ctx_data(p_hwfn,
-					      p_ptt,
-					      dump_buf + offset,
-					      dump,
-					      "TASK_AG_CTX",
-					      qed_grc_get_param(p_hwfn,
-								grc_param),
-					      storm->cm_task_ag_ctx_lid_size,
-					      storm->cm_task_ag_ctx_rd_addr,
-					      storm_id);
+		offset += qed_grc_dump_ctx_data(p_hwfn,
+						p_ptt,
+						dump_buf + offset,
+						dump,
+						"TASK_AG_CTX",
+						NUM_OF_LTIDS,
+						CM_CTX_TASK_AG, storm_id);
 
 		/* Dump Task ST context size */
-		grc_param = DBG_GRC_PARAM_NUM_LTIDS;
-		offset +=
-			qed_grc_dump_ctx_data(p_hwfn,
-					      p_ptt,
-					      dump_buf + offset,
-					      dump,
-					      "TASK_ST_CTX",
-					      qed_grc_get_param(p_hwfn,
-								grc_param),
-					      storm->cm_task_st_ctx_lid_size,
-					      storm->cm_task_st_ctx_rd_addr,
-					      storm_id);
+		offset += qed_grc_dump_ctx_data(p_hwfn,
+						p_ptt,
+						dump_buf + offset,
+						dump,
+						"TASK_ST_CTX",
+						NUM_OF_LTIDS,
+						CM_CTX_TASK_ST, storm_id);
 	}
 
 	return offset;
 }
 
-/* Dumps GRC IORs data. Returns the dumped size in dwords. */
-static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
-			     struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
-{
-	char buf[10] = "IOR_SET_?";
-	u32 addr, offset = 0;
-	u8 storm_id, set_id;
+#define VFC_STATUS_RESP_READY_BIT	0
+#define VFC_STATUS_BUSY_BIT		1
+#define VFC_STATUS_SENDING_CMD_BIT	2
 
-	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
-		struct storm_defs *storm = &s_storm_defs[storm_id];
+#define VFC_POLLING_DELAY_MS	1
+#define VFC_POLLING_COUNT		20
 
-		if (!qed_grc_is_storm_included(p_hwfn,
-					       (enum dbg_storms)storm_id))
-			continue;
+/* Reads data from VFC. Returns the number of dwords read (0 on error).
+ * Sizes are specified in dwords.
+ */
+static u32 qed_grc_dump_read_from_vfc(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      struct storm_defs *storm,
+				      u32 *cmd_data,
+				      u32 cmd_size,
+				      u32 *addr_data,
+				      u32 addr_size,
+				      u32 resp_size, u32 *dump_buf)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 vfc_status, polling_ms, polling_count = 0, i;
+	u32 reg_addr, sem_base;
+	bool is_ready = false;
+
+	sem_base = storm->sem_fast_mem_addr;
+	polling_ms = VFC_POLLING_DELAY_MS *
+	    s_hw_type_defs[dev_data->hw_type].delay_factor;
+
+	/* Write VFC command */
+	ARR_REG_WR(p_hwfn,
+		   p_ptt,
+		   sem_base + SEM_FAST_REG_VFC_DATA_WR,
+		   cmd_data, cmd_size);
+
+	/* Write VFC address */
+	ARR_REG_WR(p_hwfn,
+		   p_ptt,
+		   sem_base + SEM_FAST_REG_VFC_ADDR,
+		   addr_data, addr_size);
+
+	/* Read response */
+	for (i = 0; i < resp_size; i++) {
+		/* Poll until ready */
+		do {
+			reg_addr = sem_base + SEM_FAST_REG_VFC_STATUS;
+			qed_grc_dump_addr_range(p_hwfn,
+						p_ptt,
+						&vfc_status,
+						true,
+						BYTES_TO_DWORDS(reg_addr),
+						1,
+						false, SPLIT_TYPE_NONE, 0);
+			is_ready = vfc_status & BIT(VFC_STATUS_RESP_READY_BIT);
+
+			if (!is_ready) {
+				if (polling_count++ == VFC_POLLING_COUNT)
+					return 0;
 
-		for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) {
-			addr = BYTES_TO_DWORDS(storm->sem_fast_mem_addr +
-					       SEM_FAST_REG_STORM_REG_FILE) +
-			       IOR_SET_OFFSET(set_id);
-			if (strlen(buf) > 0)
-				buf[strlen(buf) - 1] = '0' + set_id;
-			offset += qed_grc_dump_mem(p_hwfn,
-						   p_ptt,
-						   dump_buf + offset,
-						   dump,
-						   buf,
-						   addr,
-						   IORS_PER_SET,
-						   false,
-						   32,
-						   false,
-						   "ior",
-						   true,
-						   storm->letter);
-		}
+				msleep(polling_ms);
+			}
+		} while (!is_ready);
+
+		reg_addr = sem_base + SEM_FAST_REG_VFC_DATA_RD;
+		qed_grc_dump_addr_range(p_hwfn,
+					p_ptt,
+					dump_buf + i,
+					true,
+					BYTES_TO_DWORDS(reg_addr),
+					1, false, SPLIT_TYPE_NONE, 0);
 	}
 
-	return offset;
+	return resp_size;
 }
 
 /* Dump VFC CAM. Returns the dumped size in dwords. */
@@ -3490,7 +2823,7 @@ static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn,
 	struct storm_defs *storm = &s_storm_defs[storm_id];
 	u32 cam_addr[VFC_CAM_ADDR_DWORDS] = { 0 };
 	u32 cam_cmd[VFC_CAM_CMD_DWORDS] = { 0 };
-	u32 row, i, offset = 0;
+	u32 row, offset = 0;
 
 	offset += qed_grc_dump_mem_hdr(p_hwfn,
 				       dump_buf + offset,
@@ -3499,7 +2832,7 @@ static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn,
 				       0,
 				       total_size,
 				       256,
-				       false, "vfc_cam", true, storm->letter);
+				       false, "vfc_cam", storm->letter);
 
 	if (!dump)
 		return offset + total_size;
@@ -3507,26 +2840,18 @@ static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn,
 	/* Prepare CAM address */
 	SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD);
 
-	for (row = 0; row < VFC_CAM_NUM_ROWS;
-	     row++, offset += VFC_CAM_RESP_DWORDS) {
-		/* Write VFC CAM command */
+	/* Read VFC CAM data */
+	for (row = 0; row < VFC_CAM_NUM_ROWS; row++) {
 		SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row);
-		ARR_REG_WR(p_hwfn,
-			   p_ptt,
-			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_WR,
-			   cam_cmd, VFC_CAM_CMD_DWORDS);
-
-		/* Write VFC CAM address */
-		ARR_REG_WR(p_hwfn,
-			   p_ptt,
-			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_ADDR,
-			   cam_addr, VFC_CAM_ADDR_DWORDS);
-
-		/* Read VFC CAM read response */
-		ARR_REG_RD(p_hwfn,
-			   p_ptt,
-			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_RD,
-			   dump_buf + offset, VFC_CAM_RESP_DWORDS);
+		offset += qed_grc_dump_read_from_vfc(p_hwfn,
+						     p_ptt,
+						     storm,
+						     cam_cmd,
+						     VFC_CAM_CMD_DWORDS,
+						     cam_addr,
+						     VFC_CAM_ADDR_DWORDS,
+						     VFC_CAM_RESP_DWORDS,
+						     dump_buf + offset);
 	}
 
 	return offset;
@@ -3543,7 +2868,7 @@ static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
 	struct storm_defs *storm = &s_storm_defs[storm_id];
 	u32 ram_addr[VFC_RAM_ADDR_DWORDS] = { 0 };
 	u32 ram_cmd[VFC_RAM_CMD_DWORDS] = { 0 };
-	u32 row, i, offset = 0;
+	u32 row, offset = 0;
 
 	offset += qed_grc_dump_mem_hdr(p_hwfn,
 				       dump_buf + offset,
@@ -3554,35 +2879,27 @@ static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
 				       256,
 				       false,
 				       ram_defs->type_name,
-				       true, storm->letter);
-
-	/* Prepare RAM address */
-	SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD);
+				       storm->letter);
 
 	if (!dump)
 		return offset + total_size;
 
+	/* Prepare RAM address */
+	SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD);
+
+	/* Read VFC RAM data */
 	for (row = ram_defs->base_row;
-	     row < ram_defs->base_row + ram_defs->num_rows;
-	     row++, offset += VFC_RAM_RESP_DWORDS) {
-		/* Write VFC RAM command */
-		ARR_REG_WR(p_hwfn,
-			   p_ptt,
-			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_WR,
-			   ram_cmd, VFC_RAM_CMD_DWORDS);
-
-		/* Write VFC RAM address */
+	     row < ram_defs->base_row + ram_defs->num_rows; row++) {
 		SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, ROW, row);
-		ARR_REG_WR(p_hwfn,
-			   p_ptt,
-			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_ADDR,
-			   ram_addr, VFC_RAM_ADDR_DWORDS);
-
-		/* Read VFC RAM read response */
-		ARR_REG_RD(p_hwfn,
-			   p_ptt,
-			   storm->sem_fast_mem_addr + SEM_FAST_REG_VFC_DATA_RD,
-			   dump_buf + offset, VFC_RAM_RESP_DWORDS);
+		offset += qed_grc_dump_read_from_vfc(p_hwfn,
+						     p_ptt,
+						     storm,
+						     ram_cmd,
+						     VFC_RAM_CMD_DWORDS,
+						     ram_addr,
+						     VFC_RAM_ADDR_DWORDS,
+						     VFC_RAM_RESP_DWORDS,
+						     dump_buf + offset);
 	}
 
 	return offset;
@@ -3592,16 +2909,13 @@ static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
 static u32 qed_grc_dump_vfc(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
-	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	u8 storm_id, i;
 	u32 offset = 0;
 
 	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
 		if (!qed_grc_is_storm_included(p_hwfn,
 					       (enum dbg_storms)storm_id) ||
-		    !s_storm_defs[storm_id].has_vfc ||
-		    (storm_id == DBG_PSTORM_ID && dev_data->platform_id !=
-		     PLATFORM_ASIC))
+		    !s_storm_defs[storm_id].has_vfc)
 			continue;
 
 		/* Read CAM */
@@ -3651,7 +2965,7 @@ static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn,
 					       total_dwords,
 					       rss_defs->entry_width,
 					       packed,
-					       rss_defs->type_name, false, 0);
+					       rss_defs->type_name, 0);
 
 		/* Dump RSS data */
 		if (!dump) {
@@ -3711,7 +3025,7 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
 				       0,
 				       ram_size,
 				       block_size * 8,
-				       false, type_name, false, 0);
+				       false, type_name, 0);
 
 	/* Read and dump Big RAM data */
 	if (!dump)
@@ -3737,6 +3051,7 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
 	return offset;
 }
 
+/* Dumps MCP scratchpad. Returns the dumped size in dwords. */
 static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
@@ -3758,8 +3073,8 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
 				   dump,
 				   NULL,
 				   BYTES_TO_DWORDS(MCP_REG_SCRATCH),
-				   MCP_REG_SCRATCH_SIZE_BB_K2,
-				   false, 0, false, "MCP", false, 0);
+				   MCP_REG_SCRATCH_SIZE,
+				   false, 0, false, "MCP", 0);
 
 	/* Dump MCP cpu_reg_file */
 	offset += qed_grc_dump_mem(p_hwfn,
@@ -3769,19 +3084,19 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
 				   NULL,
 				   BYTES_TO_DWORDS(MCP_REG_CPU_REG_FILE),
 				   MCP_REG_CPU_REG_FILE_SIZE,
-				   false, 0, false, "MCP", false, 0);
+				   false, 0, false, "MCP", 0);
 
 	/* Dump MCP registers */
 	block_enable[BLOCK_MCP] = true;
 	offset += qed_grc_dump_registers(p_hwfn,
 					 p_ptt,
 					 dump_buf + offset,
-					 dump, block_enable, "block", "MCP");
+					 dump, block_enable, "MCP");
 
 	/* Dump required non-MCP registers */
 	offset += qed_grc_dump_regs_hdr(dump_buf + offset,
 					dump, 1, SPLIT_TYPE_NONE, 0,
-					"block", "MCP");
+					"MCP");
 	addr = BYTES_TO_DWORDS(MISC_REG_SHARED_MEM_ADDR);
 	offset += qed_grc_dump_reg_entry(p_hwfn,
 					 p_ptt,
@@ -3798,7 +3113,9 @@ static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
 	return offset;
 }
 
-/* Dumps the tbus indirect memory for all PHYs. */
+/* Dumps the tbus indirect memory for all PHYs.
+ * Returns the dumped size in dwords.
+ */
 static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
@@ -3832,7 +3149,7 @@ static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn,
 					       mem_name,
 					       0,
 					       PHY_DUMP_SIZE_DWORDS,
-					       16, true, mem_name, false, 0);
+					       16, true, mem_name, 0);
 
 		if (!dump) {
 			offset += PHY_DUMP_SIZE_DWORDS;
@@ -3863,21 +3180,58 @@ static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn,
 	return offset;
 }
 
-static void qed_config_dbg_line(struct qed_hwfn *p_hwfn,
-				struct qed_ptt *p_ptt,
-				enum block_id block_id,
-				u8 line_id,
-				u8 enable_mask,
-				u8 right_shift,
-				u8 force_valid_mask, u8 force_frame_mask)
+static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
+					    struct qed_ptt *p_ptt,
+					    u32 image_type,
+					    u32 *nvram_offset_bytes,
+					    u32 *nvram_size_bytes);
+
+static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 nvram_offset_bytes,
+				      u32 nvram_size_bytes, u32 *ret_buf);
+
+/* Dumps the MCP HW dump from NVRAM. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    u32 *dump_buf, bool dump)
 {
-	struct block_defs *block = s_block_defs[block_id];
+	u32 hw_dump_offset_bytes = 0, hw_dump_size_bytes = 0;
+	u32 hw_dump_size_dwords = 0, offset = 0;
+	enum dbg_status status;
 
-	qed_wr(p_hwfn, p_ptt, block->dbg_select_addr, line_id);
-	qed_wr(p_hwfn, p_ptt, block->dbg_enable_addr, enable_mask);
-	qed_wr(p_hwfn, p_ptt, block->dbg_shift_addr, right_shift);
-	qed_wr(p_hwfn, p_ptt, block->dbg_force_valid_addr, force_valid_mask);
-	qed_wr(p_hwfn, p_ptt, block->dbg_force_frame_addr, force_frame_mask);
+	/* Read HW dump image from NVRAM */
+	status = qed_find_nvram_image(p_hwfn,
+				      p_ptt,
+				      NVM_TYPE_HW_DUMP_OUT,
+				      &hw_dump_offset_bytes,
+				      &hw_dump_size_bytes);
+	if (status != DBG_STATUS_OK)
+		return 0;
+
+	hw_dump_size_dwords = BYTES_TO_DWORDS(hw_dump_size_bytes);
+
+	/* Dump HW dump image section */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "mcp_hw_dump", 1);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "size", hw_dump_size_dwords);
+
+	/* Read MCP HW dump image into dump buffer */
+	if (dump && hw_dump_size_dwords) {
+		status = qed_nvram_read(p_hwfn,
+					p_ptt,
+					hw_dump_offset_bytes,
+					hw_dump_size_bytes, dump_buf + offset);
+		if (status != DBG_STATUS_OK) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to read MCP HW Dump image from NVRAM\n");
+			return 0;
+		}
+	}
+	offset += hw_dump_size_dwords;
+
+	return offset;
 }
 
 /* Dumps Static Debug data. Returns the dumped size in dwords. */
@@ -3886,26 +3240,19 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 				     u32 *dump_buf, bool dump)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	u32 block_id, line_id, offset = 0;
+	u32 block_id, line_id, offset = 0, addr, len;
 
 	/* Don't dump static debug if a debug bus recording is in progress */
 	if (dump && qed_rd(p_hwfn, p_ptt, DBG_REG_DBG_BLOCK_ON))
 		return 0;
 
 	if (dump) {
-		/* Disable all blocks debug output */
-		for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
-			struct block_defs *block = s_block_defs[block_id];
-
-			if (block->dbg_client_id[dev_data->chip_id] !=
-			    MAX_DBG_BUS_CLIENTS)
-				qed_wr(p_hwfn, p_ptt, block->dbg_enable_addr,
-				       0);
-		}
+		/* Disable debug bus in all blocks */
+		qed_bus_disable_blocks(p_hwfn, p_ptt);
 
 		qed_bus_reset_dbg_block(p_hwfn, p_ptt);
-		qed_bus_set_framing_mode(p_hwfn,
-					 p_ptt, DBG_BUS_FRAME_MODE_8HW_0ST);
+		qed_wr(p_hwfn,
+		       p_ptt, DBG_REG_FRAMING_MODE, DBG_BUS_FRAME_MODE_8HW);
 		qed_wr(p_hwfn,
 		       p_ptt, DBG_REG_DEBUG_TARGET, DBG_BUS_TARGET_ID_INT_BUF);
 		qed_wr(p_hwfn, p_ptt, DBG_REG_FULL_MODE, 1);
@@ -3914,28 +3261,48 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 
 	/* Dump all static debug lines for each relevant block */
 	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
-		struct block_defs *block = s_block_defs[block_id];
-		struct dbg_bus_block *block_desc;
-		u32 block_dwords, addr, len;
-		u8 dbg_client_id;
+		const struct dbg_block_chip *block_per_chip;
+		const struct dbg_block *block;
+		bool is_removed, has_dbg_bus;
+		u16 modes_buf_offset;
+		u32 block_dwords;
+
+		block_per_chip =
+		    qed_get_dbg_block_per_chip(p_hwfn, (enum block_id)block_id);
+		is_removed = GET_FIELD(block_per_chip->flags,
+				       DBG_BLOCK_CHIP_IS_REMOVED);
+		has_dbg_bus = GET_FIELD(block_per_chip->flags,
+					DBG_BLOCK_CHIP_HAS_DBG_BUS);
 
-		if (block->dbg_client_id[dev_data->chip_id] ==
-		    MAX_DBG_BUS_CLIENTS)
+		/* read+clear for NWS parity is not working, skip NWS block */
+		if (block_id == BLOCK_NWS)
 			continue;
 
-		block_desc = get_dbg_bus_block_desc(p_hwfn,
-						    (enum block_id)block_id);
-		block_dwords = NUM_DBG_LINES(block_desc) *
+		if (!is_removed && has_dbg_bus &&
+		    GET_FIELD(block_per_chip->dbg_bus_mode.data,
+			      DBG_MODE_HDR_EVAL_MODE) > 0) {
+			modes_buf_offset =
+			    GET_FIELD(block_per_chip->dbg_bus_mode.data,
+				      DBG_MODE_HDR_MODES_BUF_OFFSET);
+			if (!qed_is_mode_match(p_hwfn, &modes_buf_offset))
+				has_dbg_bus = false;
+		}
+
+		if (is_removed || !has_dbg_bus)
+			continue;
+
+		block_dwords = NUM_DBG_LINES(block_per_chip) *
 			       STATIC_DEBUG_LINE_DWORDS;
 
 		/* Dump static section params */
+		block = get_dbg_block(p_hwfn, (enum block_id)block_id);
 		offset += qed_grc_dump_mem_hdr(p_hwfn,
 					       dump_buf + offset,
 					       dump,
 					       block->name,
 					       0,
 					       block_dwords,
-					       32, false, "STATIC", false, 0);
+					       32, false, "STATIC", 0);
 
 		if (!dump) {
 			offset += block_dwords;
@@ -3951,20 +3318,19 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 		}
 
 		/* Enable block's client */
-		dbg_client_id = block->dbg_client_id[dev_data->chip_id];
 		qed_bus_enable_clients(p_hwfn,
 				       p_ptt,
-				       BIT(dbg_client_id));
+				       BIT(block_per_chip->dbg_client_id));
 
 		addr = BYTES_TO_DWORDS(DBG_REG_CALENDAR_OUT_DATA);
 		len = STATIC_DEBUG_LINE_DWORDS;
-		for (line_id = 0; line_id < (u32)NUM_DBG_LINES(block_desc);
+		for (line_id = 0; line_id < (u32)NUM_DBG_LINES(block_per_chip);
 		     line_id++) {
 			/* Configure debug line ID */
-			qed_config_dbg_line(p_hwfn,
-					    p_ptt,
-					    (enum block_id)block_id,
-					    (u8)line_id, 0xf, 0, 0, 0);
+			qed_bus_config_dbg_line(p_hwfn,
+						p_ptt,
+						(enum block_id)block_id,
+						(u8)line_id, 0xf, 0, 0, 0);
 
 			/* Read debug line info */
 			offset += qed_grc_dump_addr_range(p_hwfn,
@@ -3979,7 +3345,8 @@ static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
 
 		/* Disable block's client and debug output */
 		qed_bus_enable_clients(p_hwfn, p_ptt, 0);
-		qed_wr(p_hwfn, p_ptt, block->dbg_enable_addr, 0);
+		qed_bus_config_dbg_line(p_hwfn, p_ptt,
+					(enum block_id)block_id, 0, 0, 0, 0, 0);
 	}
 
 	if (dump) {
@@ -3999,8 +3366,8 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 				    bool dump, u32 *num_dumped_dwords)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 dwords_read, offset = 0;
 	bool parities_masked = false;
-	u32 offset = 0;
 	u8 i;
 
 	*num_dumped_dwords = 0;
@@ -4019,13 +3386,11 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_num_param(dump_buf + offset,
 				     dump,
 				     "num-lcids",
-				     qed_grc_get_param(p_hwfn,
-						DBG_GRC_PARAM_NUM_LCIDS));
+				     NUM_OF_LCIDS);
 	offset += qed_dump_num_param(dump_buf + offset,
 				     dump,
 				     "num-ltids",
-				     qed_grc_get_param(p_hwfn,
-						DBG_GRC_PARAM_NUM_LTIDS));
+				     NUM_OF_LTIDS);
 	offset += qed_dump_num_param(dump_buf + offset,
 				     dump, "num-ports", dev_data->num_ports);
 
@@ -4037,7 +3402,7 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 
 	/* Take all blocks out of reset (using reset registers) */
 	if (dump) {
-		qed_grc_unreset_blocks(p_hwfn, p_ptt);
+		qed_grc_unreset_blocks(p_hwfn, p_ptt, false);
 		qed_update_blocks_reset_state(p_hwfn, p_ptt);
 	}
 
@@ -4080,7 +3445,7 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 						 dump_buf +
 						 offset,
 						 dump,
-						 block_enable, NULL, NULL);
+						 block_enable, NULL);
 
 		/* Dump special registers */
 		offset += qed_grc_dump_special_regs(p_hwfn,
@@ -4114,23 +3479,29 @@ static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
 						       dump_buf + offset,
 						       dump, i);
 
-	/* Dump IORs */
-	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR))
-		offset += qed_grc_dump_iors(p_hwfn,
-					    p_ptt, dump_buf + offset, dump);
-
 	/* Dump VFC */
-	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC))
-		offset += qed_grc_dump_vfc(p_hwfn,
-					   p_ptt, dump_buf + offset, dump);
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC)) {
+		dwords_read = qed_grc_dump_vfc(p_hwfn,
+					       p_ptt, dump_buf + offset, dump);
+		offset += dwords_read;
+		if (!dwords_read)
+			return DBG_STATUS_VFC_READ_ERROR;
+	}
 
 	/* Dump PHY tbus */
 	if (qed_grc_is_included(p_hwfn,
 				DBG_GRC_PARAM_DUMP_PHY) && dev_data->chip_id ==
-	    CHIP_K2 && dev_data->platform_id == PLATFORM_ASIC)
+	    CHIP_K2 && dev_data->hw_type == HW_TYPE_ASIC)
 		offset += qed_grc_dump_phy(p_hwfn,
 					   p_ptt, dump_buf + offset, dump);
 
+	/* Dump MCP HW Dump */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MCP_HW_DUMP) &&
+	    !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP) && 1)
+		offset += qed_grc_dump_mcp_hw_dump(p_hwfn,
+						   p_ptt,
+						   dump_buf + offset, dump);
+
 	/* Dump static debug data (only if not during debug bus recording) */
 	if (qed_grc_is_included(p_hwfn,
 				DBG_GRC_PARAM_DUMP_STATIC) &&
@@ -4181,8 +3552,9 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
 	u8 reg_id;
 
 	hdr = (struct dbg_idle_chk_result_hdr *)dump_buf;
-	regs = &((const union dbg_idle_chk_reg *)
-		 s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr)[rule->reg_offset];
+	regs = (const union dbg_idle_chk_reg *)
+		p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr +
+		rule->reg_offset;
 	cond_regs = &regs[0].cond_reg;
 	info_regs = &regs[rule->num_cond_regs].info_reg;
 
@@ -4202,8 +3574,8 @@ static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
 		const struct dbg_idle_chk_cond_reg *reg = &cond_regs[reg_id];
 		struct dbg_idle_chk_result_reg_hdr *reg_hdr;
 
-		reg_hdr = (struct dbg_idle_chk_result_reg_hdr *)
-			  (dump_buf + offset);
+		reg_hdr =
+		    (struct dbg_idle_chk_result_reg_hdr *)(dump_buf + offset);
 
 		/* Write register header */
 		if (!dump) {
@@ -4320,12 +3692,13 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		const u32 *imm_values;
 
 		rule = &input_rules[i];
-		regs = &((const union dbg_idle_chk_reg *)
-			 s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr)
-			[rule->reg_offset];
+		regs = (const union dbg_idle_chk_reg *)
+			p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr +
+			rule->reg_offset;
 		cond_regs = &regs[0].cond_reg;
-		imm_values = &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr
-			     [rule->imm_offset];
+		imm_values =
+		    (u32 *)p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr +
+		    rule->imm_offset;
 
 		/* Check if all condition register blocks are out of reset, and
 		 * find maximal number of entries (all condition registers that
@@ -4443,10 +3816,12 @@ qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
 {
-	u32 num_failing_rules_offset, offset = 0, input_offset = 0;
-	u32 num_failing_rules = 0;
+	struct virt_mem_desc *dbg_buf =
+	    &p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES];
+	u32 num_failing_rules_offset, offset = 0,
+	    input_offset = 0, num_failing_rules = 0;
 
-	/* Dump global params */
+	/* Dump global params  - 1 must match below amount of params */
 	offset += qed_dump_common_global_params(p_hwfn,
 						p_ptt,
 						dump_buf + offset, dump, 1);
@@ -4458,12 +3833,10 @@ static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
 	num_failing_rules_offset = offset;
 	offset += qed_dump_num_param(dump_buf + offset, dump, "num_rules", 0);
 
-	while (input_offset <
-	       s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].size_in_dwords) {
+	while (input_offset < BYTES_TO_DWORDS(dbg_buf->size)) {
 		const struct dbg_idle_chk_cond_hdr *cond_hdr =
-			(const struct dbg_idle_chk_cond_hdr *)
-			&s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr
-			[input_offset++];
+		    (const struct dbg_idle_chk_cond_hdr *)dbg_buf->ptr +
+		    input_offset++;
 		bool eval_mode, mode_match = true;
 		u32 curr_failing_rules;
 		u16 modes_buf_offset;
@@ -4480,16 +3853,21 @@ static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
 		}
 
 		if (mode_match) {
+			const struct dbg_idle_chk_rule *rule =
+			    (const struct dbg_idle_chk_rule *)((u32 *)
+							       dbg_buf->ptr
+							       + input_offset);
+			u32 num_input_rules =
+				cond_hdr->data_size / IDLE_CHK_RULE_SIZE_DWORDS;
 			offset +=
 			    qed_idle_chk_dump_rule_entries(p_hwfn,
-				p_ptt,
-				dump_buf + offset,
-				dump,
-				(const struct dbg_idle_chk_rule *)
-				&s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].
-				ptr[input_offset],
-				cond_hdr->data_size / IDLE_CHK_RULE_SIZE_DWORDS,
-				&curr_failing_rules);
+							   p_ptt,
+							   dump_buf +
+							   offset,
+							   dump,
+							   rule,
+							   num_input_rules,
+							   &curr_failing_rules);
 			num_failing_rules += curr_failing_rules;
 		}
 
@@ -4556,7 +3934,7 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
 {
 	u32 ret_mcp_resp, ret_mcp_param, ret_read_size, bytes_to_copy;
 	s32 bytes_left = nvram_size_bytes;
-	u32 read_offset = 0;
+	u32 read_offset = 0, param = 0;
 
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_DEBUG,
@@ -4569,14 +3947,14 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
 		     MCP_DRV_NVM_BUF_LEN) ? MCP_DRV_NVM_BUF_LEN : bytes_left;
 
 		/* Call NVRAM read command */
+		SET_MFW_FIELD(param,
+			      DRV_MB_PARAM_NVM_OFFSET,
+			      nvram_offset_bytes + read_offset);
+		SET_MFW_FIELD(param, DRV_MB_PARAM_NVM_LEN, bytes_to_copy);
 		if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
-				       DRV_MSG_CODE_NVM_READ_NVRAM,
-				       (nvram_offset_bytes +
-					read_offset) |
-				       (bytes_to_copy <<
-					DRV_MB_PARAM_NVM_LEN_OFFSET),
-				       &ret_mcp_resp, &ret_mcp_param,
-				       &ret_read_size,
+				       DRV_MSG_CODE_NVM_READ_NVRAM, param,
+				       &ret_mcp_resp,
+				       &ret_mcp_param, &ret_read_size,
 				       (u32 *)((u8 *)ret_buf + read_offset)))
 			return DBG_STATUS_NVRAM_READ_FAILED;
 
@@ -4714,12 +4092,12 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	u32 trace_meta_size_dwords = 0, running_bundle_id, offset = 0;
 	u32 trace_meta_offset_bytes = 0, trace_meta_size_bytes = 0;
 	enum dbg_status status;
-	bool mcp_access;
 	int halted = 0;
+	bool use_mfw;
 
 	*num_dumped_dwords = 0;
 
-	mcp_access = !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP);
+	use_mfw = !qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_NO_MCP);
 
 	/* Get trace data info */
 	status = qed_mcp_trace_get_data_info(p_hwfn,
@@ -4740,7 +4118,7 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	 * consistent. if halt fails, MCP trace is taken anyway, with a small
 	 * risk that it may be corrupt.
 	 */
-	if (dump && mcp_access) {
+	if (dump && use_mfw) {
 		halted = !qed_mcp_halt(p_hwfn, p_ptt);
 		if (!halted)
 			DP_NOTICE(p_hwfn, "MCP halt failed!\n");
@@ -4780,17 +4158,15 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	 */
 	trace_meta_size_bytes =
 		qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_MCP_TRACE_META_SIZE);
-	if ((!trace_meta_size_bytes || dump) && mcp_access) {
+	if ((!trace_meta_size_bytes || dump) && use_mfw)
 		status = qed_mcp_trace_get_meta_info(p_hwfn,
 						     p_ptt,
 						     trace_data_size_bytes,
 						     &running_bundle_id,
 						     &trace_meta_offset_bytes,
 						     &trace_meta_size_bytes);
-		if (status == DBG_STATUS_OK)
-			trace_meta_size_dwords =
-				BYTES_TO_DWORDS(trace_meta_size_bytes);
-	}
+	if (status == DBG_STATUS_OK)
+		trace_meta_size_dwords = BYTES_TO_DWORDS(trace_meta_size_bytes);
 
 	/* Dump trace meta size param */
 	offset += qed_dump_num_param(dump_buf + offset,
@@ -4814,7 +4190,7 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	/* If no mcp access, indicate that the dump doesn't contain the meta
 	 * data from NVRAM.
 	 */
-	return mcp_access ? status : DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
+	return use_mfw ? status : DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
 }
 
 /* Dump GRC FIFO */
@@ -4992,16 +4368,18 @@ static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
 	override_window_dwords =
 		qed_rd(p_hwfn, p_ptt, GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) *
 		PROTECTION_OVERRIDE_ELEMENT_DWORDS;
-	addr = BYTES_TO_DWORDS(GRC_REG_PROTECTION_OVERRIDE_WINDOW);
-	offset += qed_grc_dump_addr_range(p_hwfn,
-					  p_ptt,
-					  dump_buf + offset,
-					  true,
-					  addr,
-					  override_window_dwords,
-					  true, SPLIT_TYPE_NONE, 0);
-	qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
-			   override_window_dwords);
+	if (override_window_dwords) {
+		addr = BYTES_TO_DWORDS(GRC_REG_PROTECTION_OVERRIDE_WINDOW);
+		offset += qed_grc_dump_addr_range(p_hwfn,
+						  p_ptt,
+						  dump_buf + offset,
+						  true,
+						  addr,
+						  override_window_dwords,
+						  true, SPLIT_TYPE_NONE, 0);
+		qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
+				   override_window_dwords);
+	}
 out:
 	/* Dump last section */
 	offset += qed_dump_last_section(dump_buf, offset, dump);
@@ -5037,7 +4415,7 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 		struct storm_defs *storm = &s_storm_defs[storm_id];
 		u32 last_list_idx, addr;
 
-		if (dev_data->block_in_reset[storm->block_id])
+		if (dev_data->block_in_reset[storm->sem_block_id])
 			continue;
 
 		/* Read FW info for the current Storm */
@@ -5088,20 +4466,362 @@ static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 	return offset;
 }
 
+/* Dumps the specified ILT pages to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_ilt_dump_pages_range(u32 *dump_buf,
+				    bool dump,
+				    u32 start_page_id,
+				    u32 num_pages,
+				    struct phys_mem_desc *ilt_pages,
+				    bool dump_page_ids)
+{
+	u32 page_id, end_page_id, offset = 0;
+
+	if (num_pages == 0)
+		return offset;
+
+	end_page_id = start_page_id + num_pages - 1;
+
+	for (page_id = start_page_id; page_id <= end_page_id; page_id++) {
+		struct phys_mem_desc *mem_desc = &ilt_pages[page_id];
+
+		/**
+		 *
+		 * if (page_id >= ->p_cxt_mngr->ilt_shadow_size)
+		 *     break;
+		 */
+
+		if (!ilt_pages[page_id].virt_addr)
+			continue;
+
+		if (dump_page_ids) {
+			/* Copy page ID to dump buffer */
+			if (dump)
+				*(dump_buf + offset) = page_id;
+			offset++;
+		} else {
+			/* Copy page memory to dump buffer */
+			if (dump)
+				memcpy(dump_buf + offset,
+				       mem_desc->virt_addr, mem_desc->size);
+			offset += BYTES_TO_DWORDS(mem_desc->size);
+		}
+	}
+
+	return offset;
+}
+
+/* Dumps a section containing the dumped ILT pages.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_ilt_dump_pages_section(struct qed_hwfn *p_hwfn,
+				      u32 *dump_buf,
+				      bool dump,
+				      u32 valid_conn_pf_pages,
+				      u32 valid_conn_vf_pages,
+				      struct phys_mem_desc *ilt_pages,
+				      bool dump_page_ids)
+{
+	struct qed_ilt_client_cfg *clients = p_hwfn->p_cxt_mngr->clients;
+	u32 pf_start_line, start_page_id, offset = 0;
+	u32 cdut_pf_init_pages, cdut_vf_init_pages;
+	u32 cdut_pf_work_pages, cdut_vf_work_pages;
+	u32 base_data_offset, size_param_offset;
+	u32 cdut_pf_pages, cdut_vf_pages;
+	const char *section_name;
+	u8 i;
+
+	section_name = dump_page_ids ? "ilt_page_ids" : "ilt_page_mem";
+	cdut_pf_init_pages = qed_get_cdut_num_pf_init_pages(p_hwfn);
+	cdut_vf_init_pages = qed_get_cdut_num_vf_init_pages(p_hwfn);
+	cdut_pf_work_pages = qed_get_cdut_num_pf_work_pages(p_hwfn);
+	cdut_vf_work_pages = qed_get_cdut_num_vf_work_pages(p_hwfn);
+	cdut_pf_pages = cdut_pf_init_pages + cdut_pf_work_pages;
+	cdut_vf_pages = cdut_vf_init_pages + cdut_vf_work_pages;
+	pf_start_line = p_hwfn->p_cxt_mngr->pf_start_line;
+
+	offset +=
+	    qed_dump_section_hdr(dump_buf + offset, dump, section_name, 1);
+
+	/* Dump size parameter (0 for now, overwritten with real size later) */
+	size_param_offset = offset;
+	offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0);
+	base_data_offset = offset;
+
+	/* CDUC pages are ordered as follows:
+	 * - PF pages - valid section (included in PF connection type mapping)
+	 * - PF pages - invalid section (not dumped)
+	 * - For each VF in the PF:
+	 *   - VF pages - valid section (included in VF connection type mapping)
+	 *   - VF pages - invalid section (not dumped)
+	 */
+	if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_DUMP_ILT_CDUC)) {
+		/* Dump connection PF pages */
+		start_page_id = clients[ILT_CLI_CDUC].first.val - pf_start_line;
+		offset += qed_ilt_dump_pages_range(dump_buf + offset,
+						   dump,
+						   start_page_id,
+						   valid_conn_pf_pages,
+						   ilt_pages, dump_page_ids);
+
+		/* Dump connection VF pages */
+		start_page_id += clients[ILT_CLI_CDUC].pf_total_lines;
+		for (i = 0; i < p_hwfn->p_cxt_mngr->vf_count;
+		     i++, start_page_id += clients[ILT_CLI_CDUC].vf_total_lines)
+			offset += qed_ilt_dump_pages_range(dump_buf + offset,
+							   dump,
+							   start_page_id,
+							   valid_conn_vf_pages,
+							   ilt_pages,
+							   dump_page_ids);
+	}
+
+	/* CDUT pages are ordered as follows:
+	 * - PF init pages (not dumped)
+	 * - PF work pages
+	 * - For each VF in the PF:
+	 *   - VF init pages (not dumped)
+	 *   - VF work pages
+	 */
+	if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_DUMP_ILT_CDUT)) {
+		/* Dump task PF pages */
+		start_page_id = clients[ILT_CLI_CDUT].first.val +
+		    cdut_pf_init_pages - pf_start_line;
+		offset += qed_ilt_dump_pages_range(dump_buf + offset,
+						   dump,
+						   start_page_id,
+						   cdut_pf_work_pages,
+						   ilt_pages, dump_page_ids);
+
+		/* Dump task VF pages */
+		start_page_id = clients[ILT_CLI_CDUT].first.val +
+		    cdut_pf_pages + cdut_vf_init_pages - pf_start_line;
+		for (i = 0; i < p_hwfn->p_cxt_mngr->vf_count;
+		     i++, start_page_id += cdut_vf_pages)
+			offset += qed_ilt_dump_pages_range(dump_buf + offset,
+							   dump,
+							   start_page_id,
+							   cdut_vf_work_pages,
+							   ilt_pages,
+							   dump_page_ids);
+	}
+
+	/* Overwrite size param */
+	if (dump)
+		qed_dump_num_param(dump_buf + size_param_offset,
+				   dump, "size", offset - base_data_offset);
+
+	return offset;
+}
+
+/* Performs ILT Dump to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_ilt_dump(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	struct qed_ilt_client_cfg *clients = p_hwfn->p_cxt_mngr->clients;
+	u32 valid_conn_vf_cids, valid_conn_vf_pages, offset = 0;
+	u32 valid_conn_pf_cids, valid_conn_pf_pages, num_pages;
+	u32 num_cids_per_page, conn_ctx_size;
+	u32 cduc_page_size, cdut_page_size;
+	struct phys_mem_desc *ilt_pages;
+	u8 conn_type;
+
+	cduc_page_size = 1 <<
+	    (clients[ILT_CLI_CDUC].p_size.val + PXP_ILT_PAGE_SIZE_NUM_BITS_MIN);
+	cdut_page_size = 1 <<
+	    (clients[ILT_CLI_CDUT].p_size.val + PXP_ILT_PAGE_SIZE_NUM_BITS_MIN);
+	conn_ctx_size = p_hwfn->p_cxt_mngr->conn_ctx_size;
+	num_cids_per_page = (int)(cduc_page_size / conn_ctx_size);
+	ilt_pages = p_hwfn->p_cxt_mngr->ilt_shadow;
+
+	/* Dump global params - 22 must match number of params below */
+	offset += qed_dump_common_global_params(p_hwfn, p_ptt,
+						dump_buf + offset, dump, 22);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "ilt-dump");
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cduc-page-size", cduc_page_size);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cduc-first-page-id",
+				     clients[ILT_CLI_CDUC].first.val);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cduc-last-page-id",
+				     clients[ILT_CLI_CDUC].last.val);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cduc-num-pf-pages",
+				     clients
+				     [ILT_CLI_CDUC].pf_total_lines);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cduc-num-vf-pages",
+				     clients
+				     [ILT_CLI_CDUC].vf_total_lines);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "max-conn-ctx-size",
+				     conn_ctx_size);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cdut-page-size", cdut_page_size);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cdut-first-page-id",
+				     clients[ILT_CLI_CDUT].first.val);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cdut-last-page-id",
+				     clients[ILT_CLI_CDUT].last.val);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cdut-num-pf-init-pages",
+				     qed_get_cdut_num_pf_init_pages(p_hwfn));
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cdut-num-vf-init-pages",
+				     qed_get_cdut_num_vf_init_pages(p_hwfn));
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cdut-num-pf-work-pages",
+				     qed_get_cdut_num_pf_work_pages(p_hwfn));
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "cdut-num-vf-work-pages",
+				     qed_get_cdut_num_vf_work_pages(p_hwfn));
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "max-task-ctx-size",
+				     p_hwfn->p_cxt_mngr->task_ctx_size);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "task-type-id",
+				     p_hwfn->p_cxt_mngr->task_type_id);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "first-vf-id-in-pf",
+				     p_hwfn->p_cxt_mngr->first_vf_in_pf);
+	offset += /* 18 */ qed_dump_num_param(dump_buf + offset,
+					      dump,
+					      "num-vfs-in-pf",
+					      p_hwfn->p_cxt_mngr->vf_count);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "ptr-size-bytes", sizeof(void *));
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "pf-start-line",
+				     p_hwfn->p_cxt_mngr->pf_start_line);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "page-mem-desc-size-dwords",
+				     PAGE_MEM_DESC_SIZE_DWORDS);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "ilt-shadow-size",
+				     p_hwfn->p_cxt_mngr->ilt_shadow_size);
+	/* Additional/Less parameters require matching of number in call to
+	 * dump_common_global_params()
+	 */
+
+	/* Dump section containing number of PF CIDs per connection type */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "num_pf_cids_per_conn_type", 1);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "size", NUM_OF_CONNECTION_TYPES_E4);
+	for (conn_type = 0, valid_conn_pf_cids = 0;
+	     conn_type < NUM_OF_CONNECTION_TYPES_E4; conn_type++, offset++) {
+		u32 num_pf_cids =
+		    p_hwfn->p_cxt_mngr->conn_cfg[conn_type].cid_count;
+
+		if (dump)
+			*(dump_buf + offset) = num_pf_cids;
+		valid_conn_pf_cids += num_pf_cids;
+	}
+
+	/* Dump section containing number of VF CIDs per connection type */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "num_vf_cids_per_conn_type", 1);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "size", NUM_OF_CONNECTION_TYPES_E4);
+	for (conn_type = 0, valid_conn_vf_cids = 0;
+	     conn_type < NUM_OF_CONNECTION_TYPES_E4; conn_type++, offset++) {
+		u32 num_vf_cids =
+		    p_hwfn->p_cxt_mngr->conn_cfg[conn_type].cids_per_vf;
+
+		if (dump)
+			*(dump_buf + offset) = num_vf_cids;
+		valid_conn_vf_cids += num_vf_cids;
+	}
+
+	/* Dump section containing physical memory descs for each ILT page */
+	num_pages = p_hwfn->p_cxt_mngr->ilt_shadow_size;
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "ilt_page_desc", 1);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "size",
+				     num_pages * PAGE_MEM_DESC_SIZE_DWORDS);
+
+	/* Copy memory descriptors to dump buffer */
+	if (dump) {
+		u32 page_id;
+
+		for (page_id = 0; page_id < num_pages;
+		     page_id++, offset += PAGE_MEM_DESC_SIZE_DWORDS)
+			memcpy(dump_buf + offset,
+			       &ilt_pages[page_id],
+			       DWORDS_TO_BYTES(PAGE_MEM_DESC_SIZE_DWORDS));
+	} else {
+		offset += num_pages * PAGE_MEM_DESC_SIZE_DWORDS;
+	}
+
+	valid_conn_pf_pages = DIV_ROUND_UP(valid_conn_pf_cids,
+					   num_cids_per_page);
+	valid_conn_vf_pages = DIV_ROUND_UP(valid_conn_vf_cids,
+					   num_cids_per_page);
+
+	/* Dump ILT pages IDs */
+	offset += qed_ilt_dump_pages_section(p_hwfn,
+					     dump_buf + offset,
+					     dump,
+					     valid_conn_pf_pages,
+					     valid_conn_vf_pages,
+					     ilt_pages, true);
+
+	/* Dump ILT pages memory */
+	offset += qed_ilt_dump_pages_section(p_hwfn,
+					     dump_buf + offset,
+					     dump,
+					     valid_conn_pf_pages,
+					     valid_conn_vf_pages,
+					     ilt_pages, false);
+
+	/* Dump last section */
+	offset += qed_dump_last_section(dump_buf, offset, dump);
+
+	return offset;
+}
+
 /***************************** Public Functions *******************************/
 
-enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr)
+enum dbg_status qed_dbg_set_bin_ptr(struct qed_hwfn *p_hwfn,
+				    const u8 * const bin_ptr)
 {
-	struct bin_buffer_hdr *buf_array = (struct bin_buffer_hdr *)bin_ptr;
+	struct bin_buffer_hdr *buf_hdrs = (struct bin_buffer_hdr *)bin_ptr;
 	u8 buf_id;
 
-	/* convert binary data to debug arrays */
-	for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++) {
-		s_dbg_arrays[buf_id].ptr =
-		    (u32 *)(bin_ptr + buf_array[buf_id].offset);
-		s_dbg_arrays[buf_id].size_in_dwords =
-		    BYTES_TO_DWORDS(buf_array[buf_id].length);
-	}
+	/* Convert binary data to debug arrays */
+	for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++)
+		qed_set_dbg_bin_buf(p_hwfn,
+				    buf_id,
+				    (u32 *)(bin_ptr + buf_hdrs[buf_id].offset),
+				    buf_hdrs[buf_id].length);
 
 	return DBG_STATUS_OK;
 }
@@ -5116,7 +4836,7 @@ bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
 		struct storm_defs *storm = &s_storm_defs[storm_id];
 
 		/* Skip Storm if it's in reset */
-		if (dev_data->block_in_reset[storm->block_id])
+		if (dev_data->block_in_reset[storm->sem_block_id])
 			continue;
 
 		/* Read FW info for the current Storm */
@@ -5129,16 +4849,17 @@ bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
 }
 
 enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt,
 				   enum dbg_grc_params grc_param, u32 val)
 {
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	enum dbg_status status;
 	int i;
 
-	DP_VERBOSE(p_hwfn, QED_MSG_DEBUG,
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_DEBUG,
 		   "dbg_grc_config: paramId = %d, val = %d\n", grc_param, val);
 
-	status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	status = qed_dbg_dev_init(p_hwfn);
 	if (status != DBG_STATUS_OK)
 		return status;
 
@@ -5164,24 +4885,23 @@ enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
 
 		/* Update all params with the preset values */
 		for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) {
+			struct grc_param_defs *defs = &s_grc_param_defs[i];
 			u32 preset_val;
-
 			/* Skip persistent params */
-			if (s_grc_param_defs[i].is_persistent)
+			if (defs->is_persistent)
 				continue;
 
 			/* Find preset value */
 			if (grc_param == DBG_GRC_PARAM_EXCLUDE_ALL)
 				preset_val =
-				    s_grc_param_defs[i].exclude_all_preset_val;
+				    defs->exclude_all_preset_val;
 			else if (grc_param == DBG_GRC_PARAM_CRASH)
 				preset_val =
-				    s_grc_param_defs[i].crash_preset_val;
+				    defs->crash_preset_val[dev_data->chip_id];
 			else
 				return DBG_STATUS_INVALID_ARGS;
 
-			qed_grc_set_param(p_hwfn,
-					  (enum dbg_grc_params)i, preset_val);
+			qed_grc_set_param(p_hwfn, i, preset_val);
 		}
 	} else {
 		/* Regular param - set its value */
@@ -5207,18 +4927,18 @@ enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 					      struct qed_ptt *p_ptt,
 					      u32 *buf_size)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
 
 	*buf_size = 0;
 
 	if (status != DBG_STATUS_OK)
 		return status;
 
-	if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)
+	if (!p_hwfn->dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
 
 	return qed_grc_dump(p_hwfn, p_ptt, NULL, false, buf_size);
@@ -5258,20 +4978,19 @@ enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   u32 *buf_size)
 {
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
-	struct idle_chk_data *idle_chk;
+	struct idle_chk_data *idle_chk = &dev_data->idle_chk;
 	enum dbg_status status;
 
-	idle_chk = &dev_data->idle_chk;
 	*buf_size = 0;
 
-	status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	status = qed_dbg_dev_init(p_hwfn);
 	if (status != DBG_STATUS_OK)
 		return status;
 
-	if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr)
+	if (!p_hwfn->dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
 
 	if (!idle_chk->buf_size_set) {
@@ -5306,6 +5025,7 @@ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
 		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
 
 	/* Update reset state */
+	qed_grc_unreset_blocks(p_hwfn, p_ptt, true);
 	qed_update_blocks_reset_state(p_hwfn, p_ptt);
 
 	/* Idle Check Dump */
@@ -5321,7 +5041,7 @@ enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						    struct qed_ptt *p_ptt,
 						    u32 *buf_size)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
 
 	*buf_size = 0;
 
@@ -5368,7 +5088,7 @@ enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
 
 	*buf_size = 0;
 
@@ -5414,7 +5134,7 @@ enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
 
 	*buf_size = 0;
 
@@ -5460,7 +5180,7 @@ qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 					      struct qed_ptt *p_ptt,
 					      u32 *buf_size)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
 
 	*buf_size = 0;
 
@@ -5510,7 +5230,7 @@ enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						     struct qed_ptt *p_ptt,
 						     u32 *buf_size)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
 
 	*buf_size = 0;
 
@@ -5554,6 +5274,50 @@ enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 	return DBG_STATUS_OK;
 }
 
+static enum dbg_status qed_dbg_ilt_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						     struct qed_ptt *p_ptt,
+						     u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
+
+	*buf_size = 0;
+
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	*buf_size = qed_ilt_dump(p_hwfn, p_ptt, NULL, false);
+
+	return DBG_STATUS_OK;
+}
+
+static enum dbg_status qed_dbg_ilt_dump(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					u32 *dump_buf,
+					u32 buf_size_in_dwords,
+					u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	*num_dumped_dwords = 0;
+
+	status = qed_dbg_ilt_get_dump_buf_size(p_hwfn,
+					       p_ptt,
+					       &needed_buf_size_in_dwords);
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	*num_dumped_dwords = qed_ilt_dump(p_hwfn, p_ptt, dump_buf, true);
+
+	/* Reveret GRC params to their default */
+	qed_dbg_grc_set_params_default(p_hwfn);
+
+	return DBG_STATUS_OK;
+}
+
 enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
 				  enum block_id block_id,
@@ -5561,19 +5325,20 @@ enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
 				  bool clear_status,
 				  struct dbg_attn_block_result *results)
 {
-	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn);
 	u8 reg_idx, num_attn_regs, num_result_regs = 0;
 	const struct dbg_attn_reg *attn_reg_arr;
 
 	if (status != DBG_STATUS_OK)
 		return status;
 
-	if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr ||
-	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)
+	if (!p_hwfn->dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
 
-	attn_reg_arr = qed_get_block_attn_regs(block_id,
+	attn_reg_arr = qed_get_block_attn_regs(p_hwfn,
+					       block_id,
 					       attn_type, &num_attn_regs);
 
 	for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) {
@@ -5618,7 +5383,7 @@ enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
 
 	results->block_id = (u8)block_id;
 	results->names_offset =
-	    qed_get_block_attn_data(block_id, attn_type)->names_offset;
+	    qed_get_block_attn_data(p_hwfn, block_id, attn_type)->names_offset;
 	SET_FIELD(results->data, DBG_ATTN_BLOCK_RESULT_ATTN_TYPE, attn_type);
 	SET_FIELD(results->data,
 		  DBG_ATTN_BLOCK_RESULT_NUM_REGS, num_result_regs);
@@ -5628,11 +5393,6 @@ enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
 
 /******************************* Data Types **********************************/
 
-struct block_info {
-	const char *name;
-	enum block_id id;
-};
-
 /* REG fifo element */
 struct reg_fifo_element {
 	u64 data;
@@ -5656,6 +5416,12 @@ struct reg_fifo_element {
 #define REG_FIFO_ELEMENT_ERROR_MASK		0x1f
 };
 
+/* REG fifo error element */
+struct reg_fifo_err {
+	u32 err_code;
+	const char *err_msg;
+};
+
 /* IGU fifo element */
 struct igu_fifo_element {
 	u32 dword0;
@@ -5755,20 +5521,6 @@ struct igu_fifo_addr_data {
 	enum igu_fifo_addr_types type;
 };
 
-struct mcp_trace_meta {
-	u32 modules_num;
-	char **modules;
-	u32 formats_num;
-	struct mcp_trace_format *formats;
-	bool is_allocated;
-};
-
-/* Debug Tools user data */
-struct dbg_tools_user_data {
-	struct mcp_trace_meta mcp_trace_meta;
-	const u32 *mcp_trace_user_meta_buf;
-};
-
 /******************************** Constants **********************************/
 
 #define MAX_MSG_LEN				1024
@@ -5776,7 +5528,7 @@ struct dbg_tools_user_data {
 #define MCP_TRACE_MAX_MODULE_LEN		8
 #define MCP_TRACE_FORMAT_MAX_PARAMS		3
 #define MCP_TRACE_FORMAT_PARAM_WIDTH \
-	(MCP_TRACE_FORMAT_P2_SIZE_SHIFT - MCP_TRACE_FORMAT_P1_SIZE_SHIFT)
+	(MCP_TRACE_FORMAT_P2_SIZE_OFFSET - MCP_TRACE_FORMAT_P1_SIZE_OFFSET)
 
 #define REG_FIFO_ELEMENT_ADDR_FACTOR		4
 #define REG_FIFO_ELEMENT_IS_PF_VF_VAL		127
@@ -5785,107 +5537,6 @@ struct dbg_tools_user_data {
 
 /***************************** Constant Arrays *******************************/
 
-struct user_dbg_array {
-	const u32 *ptr;
-	u32 size_in_dwords;
-};
-
-/* Debug arrays */
-static struct user_dbg_array
-s_user_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} };
-
-/* Block names array */
-static struct block_info s_block_info_arr[] = {
-	{"grc", BLOCK_GRC},
-	{"miscs", BLOCK_MISCS},
-	{"misc", BLOCK_MISC},
-	{"dbu", BLOCK_DBU},
-	{"pglue_b", BLOCK_PGLUE_B},
-	{"cnig", BLOCK_CNIG},
-	{"cpmu", BLOCK_CPMU},
-	{"ncsi", BLOCK_NCSI},
-	{"opte", BLOCK_OPTE},
-	{"bmb", BLOCK_BMB},
-	{"pcie", BLOCK_PCIE},
-	{"mcp", BLOCK_MCP},
-	{"mcp2", BLOCK_MCP2},
-	{"pswhst", BLOCK_PSWHST},
-	{"pswhst2", BLOCK_PSWHST2},
-	{"pswrd", BLOCK_PSWRD},
-	{"pswrd2", BLOCK_PSWRD2},
-	{"pswwr", BLOCK_PSWWR},
-	{"pswwr2", BLOCK_PSWWR2},
-	{"pswrq", BLOCK_PSWRQ},
-	{"pswrq2", BLOCK_PSWRQ2},
-	{"pglcs", BLOCK_PGLCS},
-	{"ptu", BLOCK_PTU},
-	{"dmae", BLOCK_DMAE},
-	{"tcm", BLOCK_TCM},
-	{"mcm", BLOCK_MCM},
-	{"ucm", BLOCK_UCM},
-	{"xcm", BLOCK_XCM},
-	{"ycm", BLOCK_YCM},
-	{"pcm", BLOCK_PCM},
-	{"qm", BLOCK_QM},
-	{"tm", BLOCK_TM},
-	{"dorq", BLOCK_DORQ},
-	{"brb", BLOCK_BRB},
-	{"src", BLOCK_SRC},
-	{"prs", BLOCK_PRS},
-	{"tsdm", BLOCK_TSDM},
-	{"msdm", BLOCK_MSDM},
-	{"usdm", BLOCK_USDM},
-	{"xsdm", BLOCK_XSDM},
-	{"ysdm", BLOCK_YSDM},
-	{"psdm", BLOCK_PSDM},
-	{"tsem", BLOCK_TSEM},
-	{"msem", BLOCK_MSEM},
-	{"usem", BLOCK_USEM},
-	{"xsem", BLOCK_XSEM},
-	{"ysem", BLOCK_YSEM},
-	{"psem", BLOCK_PSEM},
-	{"rss", BLOCK_RSS},
-	{"tmld", BLOCK_TMLD},
-	{"muld", BLOCK_MULD},
-	{"yuld", BLOCK_YULD},
-	{"xyld", BLOCK_XYLD},
-	{"ptld", BLOCK_PTLD},
-	{"ypld", BLOCK_YPLD},
-	{"prm", BLOCK_PRM},
-	{"pbf_pb1", BLOCK_PBF_PB1},
-	{"pbf_pb2", BLOCK_PBF_PB2},
-	{"rpb", BLOCK_RPB},
-	{"btb", BLOCK_BTB},
-	{"pbf", BLOCK_PBF},
-	{"rdif", BLOCK_RDIF},
-	{"tdif", BLOCK_TDIF},
-	{"cdu", BLOCK_CDU},
-	{"ccfc", BLOCK_CCFC},
-	{"tcfc", BLOCK_TCFC},
-	{"igu", BLOCK_IGU},
-	{"cau", BLOCK_CAU},
-	{"rgfs", BLOCK_RGFS},
-	{"rgsrc", BLOCK_RGSRC},
-	{"tgfs", BLOCK_TGFS},
-	{"tgsrc", BLOCK_TGSRC},
-	{"umac", BLOCK_UMAC},
-	{"xmac", BLOCK_XMAC},
-	{"dbg", BLOCK_DBG},
-	{"nig", BLOCK_NIG},
-	{"wol", BLOCK_WOL},
-	{"bmbn", BLOCK_BMBN},
-	{"ipc", BLOCK_IPC},
-	{"nwm", BLOCK_NWM},
-	{"nws", BLOCK_NWS},
-	{"ms", BLOCK_MS},
-	{"phy_pcie", BLOCK_PHY_PCIE},
-	{"led", BLOCK_LED},
-	{"avs_wrap", BLOCK_AVS_WRAP},
-	{"pxpreqbus", BLOCK_PXPREQBUS},
-	{"misc_aeu", BLOCK_MISC_AEU},
-	{"bar0_map", BLOCK_BAR0_MAP}
-};
-
 /* Status string array */
 static const char * const s_status_str[] = {
 	/* DBG_STATUS_OK */
@@ -5915,14 +5566,12 @@ static const char * const s_status_str[] = {
 	/* DBG_STATUS_PCI_BUF_NOT_ALLOCATED */
 	"A PCI buffer wasn't allocated",
 
-	/* DBG_STATUS_TOO_MANY_INPUTS */
-	"Too many inputs were enabled. Enabled less inputs, or set 'unifyInputs' to true",
+	/* DBG_STATUS_INVALID_FILTER_TRIGGER_DWORDS */
+	"The filter/trigger constraint dword offsets are not enabled for recording",
 
-	/* DBG_STATUS_INPUT_OVERLAP */
-	"Overlapping debug bus inputs",
 
-	/* DBG_STATUS_HW_ONLY_RECORDING */
-	"Cannot record Storm data since the entire recording cycle is used by HW",
+	/* DBG_STATUS_VFC_READ_ERROR */
+	"Error reading from VFC",
 
 	/* DBG_STATUS_STORM_ALREADY_ENABLED */
 	"The Storm was already enabled",
@@ -5939,8 +5588,8 @@ static const char * const s_status_str[] = {
 	/* DBG_STATUS_NO_INPUT_ENABLED */
 	"No input was enabled for recording",
 
-	/* DBG_STATUS_NO_FILTER_TRIGGER_64B */
-	"Filters and triggers are not allowed when recording in 64b units",
+	/* DBG_STATUS_NO_FILTER_TRIGGER_256B */
+	"Filters and triggers are not allowed in E4 256-bit mode",
 
 	/* DBG_STATUS_FILTER_ALREADY_ENABLED */
 	"The filter was already enabled",
@@ -6014,8 +5663,8 @@ static const char * const s_status_str[] = {
 	/* DBG_STATUS_MCP_COULD_NOT_RESUME */
 	"Failed to resume MCP after halt",
 
-	/* DBG_STATUS_RESERVED2 */
-	"Reserved debug status - shouldn't be returned",
+	/* DBG_STATUS_RESERVED0 */
+	"",
 
 	/* DBG_STATUS_SEMI_FIFO_NOT_EMPTY */
 	"Failed to empty SEMI sync FIFO",
@@ -6038,17 +5687,32 @@ static const char * const s_status_str[] = {
 	/* DBG_STATUS_DBG_ARRAY_NOT_SET */
 	"Debug arrays were not set (when using binary files, dbg_set_bin_ptr must be called)",
 
-	/* DBG_STATUS_FILTER_BUG */
-	"Debug Bus filtering requires the -unifyInputs option (due to a HW bug)",
+	/* DBG_STATUS_RESERVED1 */
+	"",
 
 	/* DBG_STATUS_NON_MATCHING_LINES */
-	"Non-matching debug lines - all lines must be of the same type (either 128b or 256b)",
+	"Non-matching debug lines - in E4, all lines must be of the same type (either 128b or 256b)",
 
-	/* DBG_STATUS_INVALID_TRIGGER_DWORD_OFFSET */
-	"The selected trigger dword offset wasn't enabled in the recorded HW block",
+	/* DBG_STATUS_INSUFFICIENT_HW_IDS */
+	"Insufficient HW IDs. Try to record less Storms/blocks",
 
 	/* DBG_STATUS_DBG_BUS_IN_USE */
-	"The debug bus is in use"
+	"The debug bus is in use",
+
+	/* DBG_STATUS_INVALID_STORM_DBG_MODE */
+	"The storm debug mode is not supported in the current chip",
+
+	/* DBG_STATUS_OTHER_ENGINE_BB_ONLY */
+	"Other engine is supported only in BB",
+
+	/* DBG_STATUS_FILTER_SINGLE_HW_ID */
+	"The configured filter mode requires a single Storm/block input",
+
+	/* DBG_STATUS_TRIGGER_SINGLE_HW_ID */
+	"The configured filter mode requires that all the constraints of a single trigger state will be defined on a single Storm/block input",
+
+	/* DBG_STATUS_MISSING_TRIGGER_STATE_STORM */
+	"When triggering on Storm data, the Storm to trigger on must be specified"
 };
 
 /* Idle check severity names array */
@@ -6104,7 +5768,7 @@ static const char * const s_master_strs[] = {
 	"xsdm",
 	"dbu",
 	"dmae",
-	"???",
+	"jdap",
 	"???",
 	"???",
 	"???",
@@ -6112,12 +5776,13 @@ static const char * const s_master_strs[] = {
 };
 
 /* REG FIFO error messages array */
-static const char * const s_reg_fifo_error_strs[] = {
-	"grc timeout",
-	"address doesn't belong to any block",
-	"reserved address in block or write to read-only address",
-	"privilege/protection mismatch",
-	"path isolation error"
+static struct reg_fifo_err s_reg_fifo_errors[] = {
+	{1, "grc timeout"},
+	{2, "address doesn't belong to any block"},
+	{4, "reserved address in block or write to read-only address"},
+	{8, "privilege/protection mismatch"},
+	{16, "path isolation error"},
+	{17, "RSL error"}
 };
 
 /* IGU FIFO sources array */
@@ -6357,8 +6022,21 @@ static u32 qed_print_section_params(u32 *dump_buf,
 	return dump_offset;
 }
 
-static struct dbg_tools_user_data *
-qed_dbg_get_user_data(struct qed_hwfn *p_hwfn)
+/* Returns the block name that matches the specified block ID,
+ * or NULL if not found.
+ */
+static const char *qed_dbg_get_block_name(struct qed_hwfn *p_hwfn,
+					  enum block_id block_id)
+{
+	const struct dbg_block_user *block =
+	    (const struct dbg_block_user *)
+	    p_hwfn->dbg_arrays[BIN_BUF_DBG_BLOCKS_USER_DATA].ptr + block_id;
+
+	return (const char *)block->name;
+}
+
+static struct dbg_tools_user_data *qed_dbg_get_user_data(struct qed_hwfn
+							 *p_hwfn)
 {
 	return (struct dbg_tools_user_data *)p_hwfn->dbg_user_info;
 }
@@ -6366,7 +6044,8 @@ qed_dbg_get_user_data(struct qed_hwfn *p_hwfn)
 /* Parses the idle check rules and returns the number of characters printed.
  * In case of parsing error, returns 0.
  */
-static u32 qed_parse_idle_chk_dump_rules(u32 *dump_buf,
+static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
+					 u32 *dump_buf,
 					 u32 *dump_buf_end,
 					 u32 num_rules,
 					 bool print_fw_idle_chk,
@@ -6394,19 +6073,18 @@ static u32 qed_parse_idle_chk_dump_rules(u32 *dump_buf,
 
 		hdr = (struct dbg_idle_chk_result_hdr *)dump_buf;
 		rule_parsing_data =
-			(const struct dbg_idle_chk_rule_parsing_data *)
-			&s_user_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].
-			ptr[hdr->rule_id];
+		    (const struct dbg_idle_chk_rule_parsing_data *)
+		    p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr +
+		    hdr->rule_id;
 		parsing_str_offset =
-			GET_FIELD(rule_parsing_data->data,
-				  DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET);
+		    GET_FIELD(rule_parsing_data->data,
+			      DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET);
 		has_fw_msg =
-			GET_FIELD(rule_parsing_data->data,
-				DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0;
-		parsing_str =
-			&((const char *)
-			s_user_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr)
-			[parsing_str_offset];
+		    GET_FIELD(rule_parsing_data->data,
+			      DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0;
+		parsing_str = (const char *)
+		    p_hwfn->dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr +
+		    parsing_str_offset;
 		lsi_msg = parsing_str;
 		curr_reg_id = 0;
 
@@ -6510,7 +6188,8 @@ static u32 qed_parse_idle_chk_dump_rules(u32 *dump_buf,
  * parsed_results_bytes.
  * The parsing status is returned.
  */
-static enum dbg_status qed_parse_idle_chk_dump(u32 *dump_buf,
+static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn,
+					       u32 *dump_buf,
 					       u32 num_dumped_dwords,
 					       char *results_buf,
 					       u32 *parsed_results_bytes,
@@ -6528,8 +6207,8 @@ static enum dbg_status qed_parse_idle_chk_dump(u32 *dump_buf,
 	*num_errors = 0;
 	*num_warnings = 0;
 
-	if (!s_user_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr ||
-	    !s_user_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr)
+	if (!p_hwfn->dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
 
 	/* Read global_params section */
@@ -6562,7 +6241,8 @@ static enum dbg_status qed_parse_idle_chk_dump(u32 *dump_buf,
 					    results_offset),
 			    "FW_IDLE_CHECK:\n");
 		rules_print_size =
-			qed_parse_idle_chk_dump_rules(dump_buf,
+			qed_parse_idle_chk_dump_rules(p_hwfn,
+						      dump_buf,
 						      dump_buf_end,
 						      num_rules,
 						      true,
@@ -6582,7 +6262,8 @@ static enum dbg_status qed_parse_idle_chk_dump(u32 *dump_buf,
 					    results_offset),
 			    "\nLSI_IDLE_CHECK:\n");
 		rules_print_size =
-			qed_parse_idle_chk_dump_rules(dump_buf,
+			qed_parse_idle_chk_dump_rules(p_hwfn,
+						      dump_buf,
 						      dump_buf_end,
 						      num_rules,
 						      false,
@@ -6694,9 +6375,8 @@ qed_mcp_trace_alloc_meta_data(struct qed_hwfn *p_hwfn,
 
 		format_ptr->data = qed_read_dword_from_buf(meta_buf_bytes,
 							   &offset);
-		format_len =
-		    (format_ptr->data &
-		     MCP_TRACE_FORMAT_LEN_MASK) >> MCP_TRACE_FORMAT_LEN_SHIFT;
+		format_len = GET_MFW_FIELD(format_ptr->data,
+					   MCP_TRACE_FORMAT_LEN);
 		format_ptr->format_str = kzalloc(format_len, GFP_KERNEL);
 		if (!format_ptr->format_str) {
 			/* Update number of modules to be released */
@@ -6719,7 +6399,7 @@ qed_mcp_trace_alloc_meta_data(struct qed_hwfn *p_hwfn,
  * trace_buf - MCP trace cyclic buffer
  * trace_buf_size - MCP trace cyclic buffer size in bytes
  * data_offset - offset in bytes of the data to parse in the MCP trace cyclic
- *               buffer.
+ *		 buffer.
  * data_size - size in bytes of data to parse.
  * parsed_buf - destination buffer for parsed data.
  * parsed_results_bytes - size of parsed data in bytes.
@@ -6764,9 +6444,8 @@ static enum dbg_status qed_parse_mcp_trace_buf(struct qed_hwfn *p_hwfn,
 
 		/* Skip message if its index doesn't exist in the meta data */
 		if (format_idx >= meta->formats_num) {
-			u8 format_size =
-				(u8)((header & MFW_TRACE_PRM_SIZE_MASK) >>
-				     MFW_TRACE_PRM_SIZE_SHIFT);
+			u8 format_size = (u8)GET_MFW_FIELD(header,
+							   MFW_TRACE_PRM_SIZE);
 
 			if (data_size < format_size)
 				return DBG_STATUS_MCP_TRACE_BAD_DATA;
@@ -6781,11 +6460,10 @@ static enum dbg_status qed_parse_mcp_trace_buf(struct qed_hwfn *p_hwfn,
 		format_ptr = &meta->formats[format_idx];
 
 		for (i = 0,
-		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK,
-		     param_shift = MCP_TRACE_FORMAT_P1_SIZE_SHIFT;
+		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift =
+		     MCP_TRACE_FORMAT_P1_SIZE_OFFSET;
 		     i < MCP_TRACE_FORMAT_MAX_PARAMS;
-		     i++,
-		     param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH,
+		     i++, param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH,
 		     param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) {
 			/* Extract param size (0..3) */
 			u8 param_size = (u8)((format_ptr->data & param_mask) >>
@@ -6813,12 +6491,10 @@ static enum dbg_status qed_parse_mcp_trace_buf(struct qed_hwfn *p_hwfn,
 			data_size -= param_size;
 		}
 
-		format_level = (u8)((format_ptr->data &
-				     MCP_TRACE_FORMAT_LEVEL_MASK) >>
-				    MCP_TRACE_FORMAT_LEVEL_SHIFT);
-		format_module = (u8)((format_ptr->data &
-				      MCP_TRACE_FORMAT_MODULE_MASK) >>
-				     MCP_TRACE_FORMAT_MODULE_SHIFT);
+		format_level = (u8)GET_MFW_FIELD(format_ptr->data,
+						 MCP_TRACE_FORMAT_LEVEL);
+		format_module = (u8)GET_MFW_FIELD(format_ptr->data,
+						  MCP_TRACE_FORMAT_MODULE);
 		if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str))
 			return DBG_STATUS_MCP_TRACE_BAD_DATA;
 
@@ -6960,7 +6636,7 @@ static enum dbg_status qed_parse_reg_fifo_dump(u32 *dump_buf,
 	const char *section_name, *param_name, *param_str_val;
 	u32 param_num_val, num_section_params, num_elements;
 	struct reg_fifo_element *elements;
-	u8 i, j, err_val, vf_val;
+	u8 i, j, err_code, vf_val;
 	u32 results_offset = 0;
 	char vf_str[4];
 
@@ -6991,7 +6667,7 @@ static enum dbg_status qed_parse_reg_fifo_dump(u32 *dump_buf,
 
 	/* Decode elements */
 	for (i = 0; i < num_elements; i++) {
-		bool err_printed = false;
+		const char *err_msg = NULL;
 
 		/* Discover if element belongs to a VF or a PF */
 		vf_val = GET_FIELD(elements[i].data, REG_FIFO_ELEMENT_VF);
@@ -7000,11 +6676,17 @@ static enum dbg_status qed_parse_reg_fifo_dump(u32 *dump_buf,
 		else
 			sprintf(vf_str, "%d", vf_val);
 
+		/* Find error message */
+		err_code = GET_FIELD(elements[i].data, REG_FIFO_ELEMENT_ERROR);
+		for (j = 0; j < ARRAY_SIZE(s_reg_fifo_errors) && !err_msg; j++)
+			if (err_code == s_reg_fifo_errors[j].err_code)
+				err_msg = s_reg_fifo_errors[j].err_msg;
+
 		/* Add parsed element to parsed buffer */
 		results_offset +=
 		    sprintf(qed_get_buf_ptr(results_buf,
 					    results_offset),
-			    "raw: 0x%016llx, address: 0x%07x, access: %-5s, pf: %2d, vf: %s, port: %d, privilege: %-3s, protection: %-12s, master: %-4s, errors: ",
+			    "raw: 0x%016llx, address: 0x%07x, access: %-5s, pf: %2d, vf: %s, port: %d, privilege: %-3s, protection: %-12s, master: %-4s, error: %s\n",
 			    elements[i].data,
 			    (u32)GET_FIELD(elements[i].data,
 					   REG_FIFO_ELEMENT_ADDRESS) *
@@ -7021,30 +6703,8 @@ static enum dbg_status qed_parse_reg_fifo_dump(u32 *dump_buf,
 			    s_protection_strs[GET_FIELD(elements[i].data,
 						REG_FIFO_ELEMENT_PROTECTION)],
 			    s_master_strs[GET_FIELD(elements[i].data,
-						REG_FIFO_ELEMENT_MASTER)]);
-
-		/* Print errors */
-		for (j = 0,
-		     err_val = GET_FIELD(elements[i].data,
-					 REG_FIFO_ELEMENT_ERROR);
-		     j < ARRAY_SIZE(s_reg_fifo_error_strs);
-		     j++, err_val >>= 1) {
-			if (err_val & 0x1) {
-				if (err_printed)
-					results_offset +=
-					    sprintf(qed_get_buf_ptr
-						    (results_buf,
-						     results_offset), ", ");
-				results_offset +=
-				    sprintf(qed_get_buf_ptr
-					    (results_buf, results_offset), "%s",
-					    s_reg_fifo_error_strs[j]);
-				err_printed = true;
-			}
-		}
-
-		results_offset +=
-		    sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+						    REG_FIFO_ELEMENT_MASTER)],
+			    err_msg ? err_msg : "unknown error code");
 	}
 
 	results_offset += sprintf(qed_get_buf_ptr(results_buf,
@@ -7398,27 +7058,28 @@ static enum dbg_status qed_parse_fw_asserts_dump(u32 *dump_buf,
 
 /***************************** Public Functions *******************************/
 
-enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr)
+enum dbg_status qed_dbg_user_set_bin_ptr(struct qed_hwfn *p_hwfn,
+					 const u8 * const bin_ptr)
 {
-	struct bin_buffer_hdr *buf_array = (struct bin_buffer_hdr *)bin_ptr;
+	struct bin_buffer_hdr *buf_hdrs = (struct bin_buffer_hdr *)bin_ptr;
 	u8 buf_id;
 
 	/* Convert binary data to debug arrays */
-	for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++) {
-		s_user_dbg_arrays[buf_id].ptr =
-			(u32 *)(bin_ptr + buf_array[buf_id].offset);
-		s_user_dbg_arrays[buf_id].size_in_dwords =
-			BYTES_TO_DWORDS(buf_array[buf_id].length);
-	}
+	for (buf_id = 0; buf_id < MAX_BIN_DBG_BUFFER_TYPE; buf_id++)
+		qed_set_dbg_bin_buf(p_hwfn,
+				    (enum bin_dbg_buffer_type)buf_id,
+				    (u32 *)(bin_ptr + buf_hdrs[buf_id].offset),
+				    buf_hdrs[buf_id].length);
 
 	return DBG_STATUS_OK;
 }
 
-enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn)
+enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn,
+					void **user_data_ptr)
 {
-	p_hwfn->dbg_user_info = kzalloc(sizeof(struct dbg_tools_user_data),
-					GFP_KERNEL);
-	if (!p_hwfn->dbg_user_info)
+	*user_data_ptr = kzalloc(sizeof(struct dbg_tools_user_data),
+				 GFP_KERNEL);
+	if (!(*user_data_ptr))
 		return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
 
 	return DBG_STATUS_OK;
@@ -7437,7 +7098,8 @@ enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
 {
 	u32 num_errors, num_warnings;
 
-	return qed_parse_idle_chk_dump(dump_buf,
+	return qed_parse_idle_chk_dump(p_hwfn,
+				       dump_buf,
 				       num_dumped_dwords,
 				       NULL,
 				       results_buf_size,
@@ -7453,7 +7115,8 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
 {
 	u32 parsed_buf_size;
 
-	return qed_parse_idle_chk_dump(dump_buf,
+	return qed_parse_idle_chk_dump(p_hwfn,
+				       dump_buf,
 				       num_dumped_dwords,
 				       results_buf,
 				       &parsed_buf_size,
@@ -7624,25 +7287,28 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
 				   struct dbg_attn_block_result *results)
 {
-	struct user_dbg_array *block_attn, *pstrings;
 	const u32 *block_attn_name_offsets;
-	enum dbg_attn_type attn_type;
+	const char *attn_name_base;
 	const char *block_name;
+	enum dbg_attn_type attn_type;
 	u8 num_regs, i, j;
 
 	num_regs = GET_FIELD(results->data, DBG_ATTN_BLOCK_RESULT_NUM_REGS);
-	attn_type = (enum dbg_attn_type)
-		    GET_FIELD(results->data,
-			      DBG_ATTN_BLOCK_RESULT_ATTN_TYPE);
-	block_name = s_block_info_arr[results->block_id].name;
-
-	if (!s_user_dbg_arrays[BIN_BUF_DBG_ATTN_INDEXES].ptr ||
-	    !s_user_dbg_arrays[BIN_BUF_DBG_ATTN_NAME_OFFSETS].ptr ||
-	    !s_user_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr)
+	attn_type = GET_FIELD(results->data, DBG_ATTN_BLOCK_RESULT_ATTN_TYPE);
+	block_name = qed_dbg_get_block_name(p_hwfn, results->block_id);
+	if (!block_name)
+		return DBG_STATUS_INVALID_ARGS;
+
+	if (!p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_INDEXES].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_NAME_OFFSETS].ptr ||
+	    !p_hwfn->dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr)
 		return DBG_STATUS_DBG_ARRAY_NOT_SET;
 
-	block_attn = &s_user_dbg_arrays[BIN_BUF_DBG_ATTN_NAME_OFFSETS];
-	block_attn_name_offsets = &block_attn->ptr[results->names_offset];
+	block_attn_name_offsets =
+	    (u32 *)p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_NAME_OFFSETS].ptr +
+	    results->names_offset;
+
+	attn_name_base = p_hwfn->dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr;
 
 	/* Go over registers with a non-zero attention status */
 	for (i = 0; i < num_regs; i++) {
@@ -7653,18 +7319,17 @@ enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
 		reg_result = &results->reg_results[i];
 		num_reg_attn = GET_FIELD(reg_result->data,
 					 DBG_ATTN_REG_RESULT_NUM_REG_ATTN);
-		block_attn = &s_user_dbg_arrays[BIN_BUF_DBG_ATTN_INDEXES];
-		bit_mapping = &((struct dbg_attn_bit_mapping *)
-				block_attn->ptr)[reg_result->block_attn_offset];
-
-		pstrings = &s_user_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS];
+		bit_mapping = (struct dbg_attn_bit_mapping *)
+		    p_hwfn->dbg_arrays[BIN_BUF_DBG_ATTN_INDEXES].ptr +
+		    reg_result->block_attn_offset;
 
 		/* Go over attention status bits */
-		for (j = 0; j < num_reg_attn; j++) {
+		for (j = 0; j < num_reg_attn; j++, bit_idx++) {
 			u16 attn_idx_val = GET_FIELD(bit_mapping[j].data,
 						     DBG_ATTN_BIT_MAPPING_VAL);
 			const char *attn_name, *attn_type_str, *masked_str;
-			u32 attn_name_offset, sts_addr;
+			u32 attn_name_offset;
+			u32 sts_addr;
 
 			/* Check if bit mask should be advanced (due to unused
 			 * bits).
@@ -7676,18 +7341,19 @@ enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
 			}
 
 			/* Check current bit index */
-			if (!(reg_result->sts_val & BIT(bit_idx))) {
-				bit_idx++;
+			if (!(reg_result->sts_val & BIT(bit_idx)))
 				continue;
-			}
 
-			/* Find attention name */
+			/* An attention bit with value=1 was found
+			 * Find attention name
+			 */
 			attn_name_offset =
 				block_attn_name_offsets[attn_idx_val];
-			attn_name = &((const char *)
-				      pstrings->ptr)[attn_name_offset];
-			attn_type_str = attn_type == ATTN_TYPE_INTERRUPT ?
-					"Interrupt" : "Parity";
+			attn_name = attn_name_base + attn_name_offset;
+			attn_type_str =
+				(attn_type ==
+				 ATTN_TYPE_INTERRUPT ? "Interrupt" :
+				 "Parity");
 			masked_str = reg_result->mask_val & BIT(bit_idx) ?
 				     " [masked]" : "";
 			sts_addr = GET_FIELD(reg_result->data,
@@ -7695,15 +7361,15 @@ enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
 			DP_NOTICE(p_hwfn,
 				  "%s (%s) : %s [address 0x%08x, bit %d]%s\n",
 				  block_name, attn_type_str, attn_name,
-				  sts_addr, bit_idx, masked_str);
-
-			bit_idx++;
+				  sts_addr * 4, bit_idx, masked_str);
 		}
 	}
 
 	return DBG_STATUS_OK;
 }
 
+static DEFINE_MUTEX(qed_dbg_lock);
+
 /* Wrapper for unifying the idle_chk and mcp_trace api */
 static enum dbg_status
 qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn,
@@ -7763,7 +7429,10 @@ static struct {
 		    qed_dbg_fw_asserts_get_dump_buf_size,
 		    qed_dbg_fw_asserts_dump,
 		    qed_print_fw_asserts_results,
-		    qed_get_fw_asserts_results_buf_size},};
+		    qed_get_fw_asserts_results_buf_size}, {
+	"ilt",
+		    qed_dbg_ilt_get_dump_buf_size,
+		    qed_dbg_ilt_dump, NULL, NULL},};
 
 static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size)
 {
@@ -7846,6 +7515,8 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+#define MAX_DBG_FEATURE_SIZE_DWORDS	0x3FFFFFFF
+
 /* Generic function for performing the dump of a debug feature. */
 static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt,
@@ -7875,6 +7546,17 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
 						       &buf_size_dwords);
 	if (rc != DBG_STATUS_OK && rc != DBG_STATUS_NVRAM_GET_IMAGE_FAILED)
 		return rc;
+
+	if (buf_size_dwords > MAX_DBG_FEATURE_SIZE_DWORDS) {
+		feature->buf_size = 0;
+		DP_NOTICE(p_hwfn->cdev,
+			  "Debug feature [\"%s\"] size (0x%x dwords) exceeds maximum size (0x%x dwords)\n",
+			  qed_features_lookup[feature_idx].name,
+			  buf_size_dwords, MAX_DBG_FEATURE_SIZE_DWORDS);
+
+		return DBG_STATUS_OK;
+	}
+
 	feature->buf_size = buf_size_dwords * sizeof(u32);
 	feature->dump_buf = vmalloc(feature->buf_size);
 	if (!feature->dump_buf)
@@ -8021,6 +7703,16 @@ int qed_dbg_fw_asserts_size(struct qed_dev *cdev)
 	return qed_dbg_feature_size(cdev, DBG_FEATURE_FW_ASSERTS);
 }
 
+int qed_dbg_ilt(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_ILT, num_dumped_bytes);
+}
+
+int qed_dbg_ilt_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_ILT);
+}
+
 int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer,
 		      u32 *num_dumped_bytes)
 {
@@ -8037,9 +7729,17 @@ int qed_dbg_mcp_trace_size(struct qed_dev *cdev)
  * feature buffer.
  */
 #define REGDUMP_HEADER_SIZE			sizeof(u32)
+#define REGDUMP_HEADER_SIZE_SHIFT		0
+#define REGDUMP_HEADER_SIZE_MASK		0xffffff
 #define REGDUMP_HEADER_FEATURE_SHIFT		24
-#define REGDUMP_HEADER_ENGINE_SHIFT		31
+#define REGDUMP_HEADER_FEATURE_MASK		0x3f
 #define REGDUMP_HEADER_OMIT_ENGINE_SHIFT	30
+#define REGDUMP_HEADER_OMIT_ENGINE_MASK		0x1
+#define REGDUMP_HEADER_ENGINE_SHIFT		31
+#define REGDUMP_HEADER_ENGINE_MASK		0x1
+#define REGDUMP_MAX_SIZE			0x1000000
+#define ILT_DUMP_MAX_SIZE			(1024 * 1024 * 15)
+
 enum debug_print_features {
 	OLD_MODE = 0,
 	IDLE_CHK = 1,
@@ -8053,17 +7753,27 @@ enum debug_print_features {
 	NVM_CFG1 = 9,
 	DEFAULT_CFG = 10,
 	NVM_META = 11,
+	MDUMP = 12,
+	ILT_DUMP = 13,
 };
 
-static u32 qed_calc_regdump_header(enum debug_print_features feature,
+static u32 qed_calc_regdump_header(struct qed_dev *cdev,
+				   enum debug_print_features feature,
 				   int engine, u32 feature_size, u8 omit_engine)
 {
-	/* Insert the engine, feature and mode inside the header and combine it
-	 * with feature size.
-	 */
-	return feature_size | (feature << REGDUMP_HEADER_FEATURE_SHIFT) |
-	       (omit_engine << REGDUMP_HEADER_OMIT_ENGINE_SHIFT) |
-	       (engine << REGDUMP_HEADER_ENGINE_SHIFT);
+	u32 res = 0;
+
+	SET_FIELD(res, REGDUMP_HEADER_SIZE, feature_size);
+	if (res != feature_size)
+		DP_NOTICE(cdev,
+			  "Feature %d is too large (size 0x%x) and will corrupt the dump\n",
+			  feature, feature_size);
+
+	SET_FIELD(res, REGDUMP_HEADER_FEATURE, feature);
+	SET_FIELD(res, REGDUMP_HEADER_OMIT_ENGINE, omit_engine);
+	SET_FIELD(res, REGDUMP_HEADER_ENGINE, engine);
+
+	return res;
 }
 
 int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
@@ -8079,9 +7789,11 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 	for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
 		grc_params[i] = dev_data->grc.param_val[i];
 
-	if (cdev->num_hwfns == 1)
+	if (!QED_IS_CMT(cdev))
 		omit_engine = 1;
 
+	mutex_lock(&qed_dbg_lock);
+
 	org_engine = qed_get_debug_engine(cdev);
 	for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) {
 		/* Collect idle_chks and grcDump for each hw function */
@@ -8094,7 +7806,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 				      REGDUMP_HEADER_SIZE, &feature_size);
 		if (!rc) {
 			*(u32 *)((u8 *)buffer + offset) =
-			    qed_calc_regdump_header(IDLE_CHK, cur_engine,
+			    qed_calc_regdump_header(cdev, IDLE_CHK, cur_engine,
 						    feature_size, omit_engine);
 			offset += (feature_size + REGDUMP_HEADER_SIZE);
 		} else {
@@ -8106,7 +7818,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 				      REGDUMP_HEADER_SIZE, &feature_size);
 		if (!rc) {
 			*(u32 *)((u8 *)buffer + offset) =
-			    qed_calc_regdump_header(IDLE_CHK, cur_engine,
+			    qed_calc_regdump_header(cdev, IDLE_CHK, cur_engine,
 						    feature_size, omit_engine);
 			offset += (feature_size + REGDUMP_HEADER_SIZE);
 		} else {
@@ -8118,7 +7830,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 				      REGDUMP_HEADER_SIZE, &feature_size);
 		if (!rc) {
 			*(u32 *)((u8 *)buffer + offset) =
-			    qed_calc_regdump_header(REG_FIFO, cur_engine,
+			    qed_calc_regdump_header(cdev, REG_FIFO, cur_engine,
 						    feature_size, omit_engine);
 			offset += (feature_size + REGDUMP_HEADER_SIZE);
 		} else {
@@ -8130,7 +7842,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 				      REGDUMP_HEADER_SIZE, &feature_size);
 		if (!rc) {
 			*(u32 *)((u8 *)buffer + offset) =
-			    qed_calc_regdump_header(IGU_FIFO, cur_engine,
+			    qed_calc_regdump_header(cdev, IGU_FIFO, cur_engine,
 						    feature_size, omit_engine);
 			offset += (feature_size + REGDUMP_HEADER_SIZE);
 		} else {
@@ -8143,7 +7855,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 						 &feature_size);
 		if (!rc) {
 			*(u32 *)((u8 *)buffer + offset) =
-			    qed_calc_regdump_header(PROTECTION_OVERRIDE,
+			    qed_calc_regdump_header(cdev, PROTECTION_OVERRIDE,
 						    cur_engine,
 						    feature_size, omit_engine);
 			offset += (feature_size + REGDUMP_HEADER_SIZE);
@@ -8158,25 +7870,45 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 					REGDUMP_HEADER_SIZE, &feature_size);
 		if (!rc) {
 			*(u32 *)((u8 *)buffer + offset) =
-			    qed_calc_regdump_header(FW_ASSERTS, cur_engine,
-						    feature_size, omit_engine);
+			    qed_calc_regdump_header(cdev, FW_ASSERTS,
+						    cur_engine, feature_size,
+						    omit_engine);
 			offset += (feature_size + REGDUMP_HEADER_SIZE);
 		} else {
 			DP_ERR(cdev, "qed_dbg_fw_asserts failed. rc = %d\n",
 			       rc);
 		}
 
-		for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
-			dev_data->grc.param_val[i] = grc_params[i];
+		feature_size = qed_dbg_ilt_size(cdev);
+		if (!cdev->disable_ilt_dump &&
+		    feature_size < ILT_DUMP_MAX_SIZE) {
+			rc = qed_dbg_ilt(cdev, (u8 *)buffer + offset +
+					 REGDUMP_HEADER_SIZE, &feature_size);
+			if (!rc) {
+				*(u32 *)((u8 *)buffer + offset) =
+				    qed_calc_regdump_header(cdev, ILT_DUMP,
+							    cur_engine,
+							    feature_size,
+							    omit_engine);
+				offset += feature_size + REGDUMP_HEADER_SIZE;
+			} else {
+				DP_ERR(cdev, "qed_dbg_ilt failed. rc = %d\n",
+				       rc);
+			}
+		}
 
 		/* GRC dump - must be last because when mcp stuck it will
 		 * clutter idle_chk, reg_fifo, ...
 		 */
+		for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+			dev_data->grc.param_val[i] = grc_params[i];
+
 		rc = qed_dbg_grc(cdev, (u8 *)buffer + offset +
 				 REGDUMP_HEADER_SIZE, &feature_size);
 		if (!rc) {
 			*(u32 *)((u8 *)buffer + offset) =
-			    qed_calc_regdump_header(GRC_DUMP, cur_engine,
+			    qed_calc_regdump_header(cdev, GRC_DUMP,
+						    cur_engine,
 						    feature_size, omit_engine);
 			offset += (feature_size + REGDUMP_HEADER_SIZE);
 		} else {
@@ -8185,12 +7917,13 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 	}
 
 	qed_set_debug_engine(cdev, org_engine);
+
 	/* mcp_trace */
 	rc = qed_dbg_mcp_trace(cdev, (u8 *)buffer + offset +
 			       REGDUMP_HEADER_SIZE, &feature_size);
 	if (!rc) {
 		*(u32 *)((u8 *)buffer + offset) =
-		    qed_calc_regdump_header(MCP_TRACE, cur_engine,
+		    qed_calc_regdump_header(cdev, MCP_TRACE, cur_engine,
 					    feature_size, omit_engine);
 		offset += (feature_size + REGDUMP_HEADER_SIZE);
 	} else {
@@ -8199,11 +7932,12 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 
 	/* nvm cfg1 */
 	rc = qed_dbg_nvm_image(cdev,
-			       (u8 *)buffer + offset + REGDUMP_HEADER_SIZE,
-			       &feature_size, QED_NVM_IMAGE_NVM_CFG1);
+			       (u8 *)buffer + offset +
+			       REGDUMP_HEADER_SIZE, &feature_size,
+			       QED_NVM_IMAGE_NVM_CFG1);
 	if (!rc) {
 		*(u32 *)((u8 *)buffer + offset) =
-		    qed_calc_regdump_header(NVM_CFG1, cur_engine,
+		    qed_calc_regdump_header(cdev, NVM_CFG1, cur_engine,
 					    feature_size, omit_engine);
 		offset += (feature_size + REGDUMP_HEADER_SIZE);
 	} else if (rc != -ENOENT) {
@@ -8218,7 +7952,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 			       &feature_size, QED_NVM_IMAGE_DEFAULT_CFG);
 	if (!rc) {
 		*(u32 *)((u8 *)buffer + offset) =
-		    qed_calc_regdump_header(DEFAULT_CFG, cur_engine,
+		    qed_calc_regdump_header(cdev, DEFAULT_CFG, cur_engine,
 					    feature_size, omit_engine);
 		offset += (feature_size + REGDUMP_HEADER_SIZE);
 	} else if (rc != -ENOENT) {
@@ -8234,8 +7968,8 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 			       &feature_size, QED_NVM_IMAGE_NVM_META);
 	if (!rc) {
 		*(u32 *)((u8 *)buffer + offset) =
-		    qed_calc_regdump_header(NVM_META, cur_engine,
-					    feature_size, omit_engine);
+			qed_calc_regdump_header(cdev, NVM_META, cur_engine,
+						feature_size, omit_engine);
 		offset += (feature_size + REGDUMP_HEADER_SIZE);
 	} else if (rc != -ENOENT) {
 		DP_ERR(cdev,
@@ -8243,6 +7977,23 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 		       QED_NVM_IMAGE_NVM_META, "QED_NVM_IMAGE_NVM_META", rc);
 	}
 
+	/* nvm mdump */
+	rc = qed_dbg_nvm_image(cdev, (u8 *)buffer + offset +
+			       REGDUMP_HEADER_SIZE, &feature_size,
+			       QED_NVM_IMAGE_MDUMP);
+	if (!rc) {
+		*(u32 *)((u8 *)buffer + offset) =
+			qed_calc_regdump_header(cdev, MDUMP, cur_engine,
+						feature_size, omit_engine);
+		offset += (feature_size + REGDUMP_HEADER_SIZE);
+	} else if (rc != -ENOENT) {
+		DP_ERR(cdev,
+		       "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n",
+		       QED_NVM_IMAGE_MDUMP, "QED_NVM_IMAGE_MDUMP", rc);
+	}
+
+	mutex_unlock(&qed_dbg_lock);
+
 	return 0;
 }
 
@@ -8250,9 +8001,10 @@ int qed_dbg_all_data_size(struct qed_dev *cdev)
 {
 	struct qed_hwfn *p_hwfn =
 		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
-	u32 regs_len = 0, image_len = 0;
+	u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0;
 	u8 cur_engine, org_engine;
 
+	cdev->disable_ilt_dump = false;
 	org_engine = qed_get_debug_engine(cdev);
 	for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) {
 		/* Engine specific */
@@ -8267,6 +8019,12 @@ int qed_dbg_all_data_size(struct qed_dev *cdev)
 			    REGDUMP_HEADER_SIZE +
 			    qed_dbg_protection_override_size(cdev) +
 			    REGDUMP_HEADER_SIZE + qed_dbg_fw_asserts_size(cdev);
+
+		ilt_len = REGDUMP_HEADER_SIZE + qed_dbg_ilt_size(cdev);
+		if (ilt_len < ILT_DUMP_MAX_SIZE) {
+			total_ilt_len += ilt_len;
+			regs_len += ilt_len;
+		}
 	}
 
 	qed_set_debug_engine(cdev, org_engine);
@@ -8282,6 +8040,17 @@ int qed_dbg_all_data_size(struct qed_dev *cdev)
 	qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_NVM_META, &image_len);
 	if (image_len)
 		regs_len += REGDUMP_HEADER_SIZE + image_len;
+	qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_MDUMP, &image_len);
+	if (image_len)
+		regs_len += REGDUMP_HEADER_SIZE + image_len;
+
+	if (regs_len > REGDUMP_MAX_SIZE) {
+		DP_VERBOSE(cdev, QED_MSG_DEBUG,
+			   "Dump exceeds max size 0x%x, disable ILT dump\n",
+			   REGDUMP_MAX_SIZE);
+		cdev->disable_ilt_dump = true;
+		regs_len -= total_ilt_len;
+	}
 
 	return regs_len;
 }
@@ -8327,9 +8096,8 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
 {
 	struct qed_hwfn *p_hwfn =
 		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+	struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature];
 	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
-	struct qed_dbg_feature *qed_feature =
-		&cdev->dbg_params.features[feature];
 	u32 buf_size_dwords;
 	enum dbg_status rc;
 
@@ -8341,6 +8109,10 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
 	if (rc != DBG_STATUS_OK)
 		buf_size_dwords = 0;
 
+	/* Feature will not be dumped if it exceeds maximum size */
+	if (buf_size_dwords > MAX_DBG_FEATURE_SIZE_DWORDS)
+		buf_size_dwords = 0;
+
 	qed_ptt_release(p_hwfn, p_ptt);
 	qed_feature->buf_size = buf_size_dwords * sizeof(u32);
 	return qed_feature->buf_size;
@@ -8360,14 +8132,21 @@ void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
 
 void qed_dbg_pf_init(struct qed_dev *cdev)
 {
-	const u8 *dbg_values;
+	const u8 *dbg_values = NULL;
+	int i;
 
 	/* Debug values are after init values.
 	 * The offset is the first dword of the file.
 	 */
 	dbg_values = cdev->firmware->data + *(u32 *)cdev->firmware->data;
-	qed_dbg_set_bin_ptr((u8 *)dbg_values);
-	qed_dbg_user_set_bin_ptr((u8 *)dbg_values);
+
+	for_each_hwfn(cdev, i) {
+		qed_dbg_set_bin_ptr(&cdev->hwfns[i], dbg_values);
+		qed_dbg_user_set_bin_ptr(&cdev->hwfns[i], dbg_values);
+	}
+
+	/* Set the hwfn to be 0 as default */
+	cdev->dbg_params.engine_for_debug = 0;
 }
 
 void qed_dbg_pf_exit(struct qed_dev *cdev)
@@ -8375,11 +8154,11 @@ void qed_dbg_pf_exit(struct qed_dev *cdev)
 	struct qed_dbg_feature *feature = NULL;
 	enum qed_dbg_features feature_idx;
 
-	/* Debug features' buffers may be allocated if debug feature was used
-	 * but dump wasn't called.
+	/* debug features' buffers may be allocated if debug feature was used
+	 * but dump wasn't called
 	 */
 	for (feature_idx = 0; feature_idx < DBG_FEATURE_NUM; feature_idx++) {
-		feature = &cdev->dbg_params.features[feature_idx];
+		feature = &cdev->dbg_features[feature_idx];
 		if (feature->dump_buf) {
 			vfree(feature->dump_buf);
 			feature->dump_buf = NULL;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h
index e47e0e8d75b0..edf99d296bd1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h
@@ -14,11 +14,13 @@ enum qed_dbg_features {
 	DBG_FEATURE_IGU_FIFO,
 	DBG_FEATURE_PROTECTION_OVERRIDE,
 	DBG_FEATURE_FW_ASSERTS,
+	DBG_FEATURE_ILT,
 	DBG_FEATURE_NUM
 };
 
 /* Forward Declaration */
 struct qed_dev;
+struct qed_hwfn;
 
 int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes);
 int qed_dbg_grc_size(struct qed_dev *cdev);
@@ -37,6 +39,8 @@ int qed_dbg_protection_override_size(struct qed_dev *cdev);
 int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer,
 		       u32 *num_dumped_bytes);
 int qed_dbg_fw_asserts_size(struct qed_dev *cdev);
+int qed_dbg_ilt(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes);
+int qed_dbg_ilt_size(struct qed_dev *cdev);
 int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer,
 		      u32 *num_dumped_bytes);
 int qed_dbg_mcp_trace_size(struct qed_dev *cdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index a1ebc2b1ca0b..7912911337d4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -907,7 +907,7 @@ qed_llh_access_filter(struct qed_hwfn *p_hwfn,
 	/* Filter value */
 	addr = NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * filter_idx * 0x4;
 
-	params.flags = QED_DMAE_FLAG_PF_DST;
+	SET_FIELD(params.flags, QED_DMAE_PARAMS_DST_PF_VALID, 0x1);
 	params.dst_pfid = pfid;
 	rc = qed_dmae_host2grc(p_hwfn,
 			       p_ptt,
@@ -1412,6 +1412,7 @@ void qed_resc_free(struct qed_dev *cdev)
 		qed_dmae_info_free(p_hwfn);
 		qed_dcbx_info_free(p_hwfn);
 		qed_dbg_user_data_free(p_hwfn);
+		qed_fw_overlay_mem_free(p_hwfn, p_hwfn->fw_overlay_mem);
 
 		/* Destroy doorbell recovery mechanism */
 		qed_db_recovery_teardown(p_hwfn);
@@ -1571,7 +1572,7 @@ static void qed_init_qm_vport_params(struct qed_hwfn *p_hwfn)
 
 	/* all vports participate in weighted fair queueing */
 	for (i = 0; i < qed_init_qm_get_num_vports(p_hwfn); i++)
-		qm_info->qm_vport_params[i].vport_wfq = 1;
+		qm_info->qm_vport_params[i].wfq = 1;
 }
 
 /* initialize qm port params */
@@ -1579,6 +1580,7 @@ static void qed_init_qm_port_params(struct qed_hwfn *p_hwfn)
 {
 	/* Initialize qm port parameters */
 	u8 i, active_phys_tcs, num_ports = p_hwfn->cdev->num_ports_in_engine;
+	struct qed_dev *cdev = p_hwfn->cdev;
 
 	/* indicate how ooo and high pri traffic is dealt with */
 	active_phys_tcs = num_ports == MAX_NUM_PORTS_K2 ?
@@ -1588,11 +1590,13 @@ static void qed_init_qm_port_params(struct qed_hwfn *p_hwfn)
 	for (i = 0; i < num_ports; i++) {
 		struct init_qm_port_params *p_qm_port =
 		    &p_hwfn->qm_info.qm_port_params[i];
+		u16 pbf_max_cmd_lines;
 
 		p_qm_port->active = 1;
 		p_qm_port->active_phys_tcs = active_phys_tcs;
-		p_qm_port->num_pbf_cmd_lines = PBF_MAX_CMD_LINES / num_ports;
-		p_qm_port->num_btb_blocks = BTB_MAX_BLOCKS / num_ports;
+		pbf_max_cmd_lines = (u16)NUM_OF_PBF_CMD_LINES(cdev);
+		p_qm_port->num_pbf_cmd_lines = pbf_max_cmd_lines / num_ports;
+		p_qm_port->num_btb_blocks = NUM_OF_BTB_BLOCKS(cdev) / num_ports;
 	}
 }
 
@@ -2034,9 +2038,8 @@ static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn)
 		vport = &(qm_info->qm_vport_params[i]);
 		DP_VERBOSE(p_hwfn,
 			   NETIF_MSG_HW,
-			   "vport idx %d, vport_rl %d, wfq %d, first_tx_pq_id [ ",
-			   qm_info->start_vport + i,
-			   vport->vport_rl, vport->vport_wfq);
+			   "vport idx %d, wfq %d, first_tx_pq_id [ ",
+			   qm_info->start_vport + i, vport->wfq);
 		for (tc = 0; tc < NUM_OF_TCS; tc++)
 			DP_VERBOSE(p_hwfn,
 				   NETIF_MSG_HW,
@@ -2049,11 +2052,11 @@ static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn)
 		pq = &(qm_info->qm_pq_params[i]);
 		DP_VERBOSE(p_hwfn,
 			   NETIF_MSG_HW,
-			   "pq idx %d, port %d, vport_id %d, tc %d, wrr_grp %d, rl_valid %d\n",
+			   "pq idx %d, port %d, vport_id %d, tc %d, wrr_grp %d, rl_valid %d rl_id %d\n",
 			   qm_info->start_pq + i,
 			   pq->port_id,
 			   pq->vport_id,
-			   pq->tc_id, pq->wrr_group, pq->rl_valid);
+			   pq->tc_id, pq->wrr_group, pq->rl_valid, pq->rl_id);
 	}
 }
 
@@ -2103,9 +2106,6 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	if (!b_rc)
 		return -EINVAL;
 
-	/* clear the QM_PF runtime phase leftovers from previous init */
-	qed_init_clear_rt_data(p_hwfn);
-
 	/* prepare QM portion of runtime array */
 	qed_qm_init_pf(p_hwfn, p_ptt, false);
 
@@ -2346,7 +2346,7 @@ int qed_resc_alloc(struct qed_dev *cdev)
 		if (rc)
 			goto alloc_err;
 
-		rc = qed_dbg_alloc_user_data(p_hwfn);
+		rc = qed_dbg_alloc_user_data(p_hwfn, &p_hwfn->dbg_user_info);
 		if (rc)
 			goto alloc_err;
 	}
@@ -2623,7 +2623,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 	params.max_phys_tcs_per_port = qm_info->max_phys_tcs_per_port;
 	params.pf_rl_en = qm_info->pf_rl_en;
 	params.pf_wfq_en = qm_info->pf_wfq_en;
-	params.vport_rl_en = qm_info->vport_rl_en;
+	params.global_rl_en = qm_info->vport_rl_en;
 	params.vport_wfq_en = qm_info->vport_wfq_en;
 	params.port_params = qm_info->qm_port_params;
 
@@ -2891,6 +2891,8 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
+	qed_fw_overlay_init_ram(p_hwfn, p_ptt, p_hwfn->fw_overlay_mem);
+
 	/* Pure runtime initializations - directly to the HW  */
 	qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true);
 
@@ -3000,8 +3002,10 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 	u32 load_code, resp, param, drv_mb_param;
 	bool b_default_mtu = true;
 	struct qed_hwfn *p_hwfn;
-	int rc = 0, i;
+	const u32 *fw_overlays;
+	u32 fw_overlays_len;
 	u16 ether_type;
+	int rc = 0, i;
 
 	if ((p_params->int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
 		DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n");
@@ -3102,6 +3106,17 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 		 */
 		qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt);
 
+		fw_overlays = cdev->fw_data->fw_overlays;
+		fw_overlays_len = cdev->fw_data->fw_overlays_len;
+		p_hwfn->fw_overlay_mem =
+		    qed_fw_overlay_mem_alloc(p_hwfn, fw_overlays,
+					     fw_overlays_len);
+		if (!p_hwfn->fw_overlay_mem) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to allocate fw overlay memory\n");
+			goto load_err;
+		}
+
 		switch (load_code) {
 		case FW_MSG_CODE_DRV_LOAD_ENGINE:
 			rc = qed_hw_init_common(p_hwfn, p_hwfn->p_main_ptt,
@@ -3566,8 +3581,10 @@ const char *qed_hw_get_resc_name(enum qed_resources res_id)
 		return "RDMA_CNQ_RAM";
 	case QED_ILT:
 		return "ILT";
-	case QED_LL2_QUEUE:
-		return "LL2_QUEUE";
+	case QED_LL2_RAM_QUEUE:
+		return "LL2_RAM_QUEUE";
+	case QED_LL2_CTX_QUEUE:
+		return "LL2_CTX_QUEUE";
 	case QED_CMDQS_CQS:
 		return "CMDQS_CQS";
 	case QED_RDMA_STATS_QUEUE:
@@ -3606,18 +3623,46 @@ __qed_hw_set_soft_resc_size(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+static u32 qed_hsi_def_val[][MAX_CHIP_IDS] = {
+	{MAX_NUM_VFS_BB, MAX_NUM_VFS_K2},
+	{MAX_NUM_L2_QUEUES_BB, MAX_NUM_L2_QUEUES_K2},
+	{MAX_NUM_PORTS_BB, MAX_NUM_PORTS_K2},
+	{MAX_SB_PER_PATH_BB, MAX_SB_PER_PATH_K2,},
+	{MAX_NUM_PFS_BB, MAX_NUM_PFS_K2},
+	{MAX_NUM_VPORTS_BB, MAX_NUM_VPORTS_K2},
+	{ETH_RSS_ENGINE_NUM_BB, ETH_RSS_ENGINE_NUM_K2},
+	{MAX_QM_TX_QUEUES_BB, MAX_QM_TX_QUEUES_K2},
+	{PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2},
+	{RDMA_NUM_STATISTIC_COUNTERS_BB, RDMA_NUM_STATISTIC_COUNTERS_K2},
+	{MAX_QM_GLOBAL_RLS, MAX_QM_GLOBAL_RLS},
+	{PBF_MAX_CMD_LINES, PBF_MAX_CMD_LINES},
+	{BTB_MAX_BLOCKS_BB, BTB_MAX_BLOCKS_K2},
+};
+
+u32 qed_get_hsi_def_val(struct qed_dev *cdev, enum qed_hsi_def_type type)
+{
+	enum chip_ids chip_id = QED_IS_BB(cdev) ? CHIP_BB : CHIP_K2;
+
+	if (type >= QED_NUM_HSI_DEFS) {
+		DP_ERR(cdev, "Unexpected HSI definition type [%d]\n", type);
+		return 0;
+	}
+
+	return qed_hsi_def_val[type][chip_id];
+}
 static int
 qed_hw_set_soft_resc_size(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
-	bool b_ah = QED_IS_AH(p_hwfn->cdev);
 	u32 resc_max_val, mcp_resp;
 	u8 res_id;
 	int rc;
-
 	for (res_id = 0; res_id < QED_MAX_RESC; res_id++) {
 		switch (res_id) {
-		case QED_LL2_QUEUE:
-			resc_max_val = MAX_NUM_LL2_RX_QUEUES;
+		case QED_LL2_RAM_QUEUE:
+			resc_max_val = MAX_NUM_LL2_RX_RAM_QUEUES;
+			break;
+		case QED_LL2_CTX_QUEUE:
+			resc_max_val = MAX_NUM_LL2_RX_CTX_QUEUES;
 			break;
 		case QED_RDMA_CNQ_RAM:
 			/* No need for a case for QED_CMDQS_CQS since
@@ -3626,8 +3671,8 @@ qed_hw_set_soft_resc_size(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 			resc_max_val = NUM_OF_GLOBAL_QUEUES;
 			break;
 		case QED_RDMA_STATS_QUEUE:
-			resc_max_val = b_ah ? RDMA_NUM_STATISTIC_COUNTERS_K2
-			    : RDMA_NUM_STATISTIC_COUNTERS_BB;
+			resc_max_val =
+			    NUM_OF_RDMA_STATISTIC_COUNTERS(p_hwfn->cdev);
 			break;
 		case QED_BDQ:
 			resc_max_val = BDQ_NUM_RESOURCES;
@@ -3660,28 +3705,24 @@ int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
 			 u32 *p_resc_num, u32 *p_resc_start)
 {
 	u8 num_funcs = p_hwfn->num_funcs_on_engine;
-	bool b_ah = QED_IS_AH(p_hwfn->cdev);
+	struct qed_dev *cdev = p_hwfn->cdev;
 
 	switch (res_id) {
 	case QED_L2_QUEUE:
-		*p_resc_num = (b_ah ? MAX_NUM_L2_QUEUES_K2 :
-			       MAX_NUM_L2_QUEUES_BB) / num_funcs;
+		*p_resc_num = NUM_OF_L2_QUEUES(cdev) / num_funcs;
 		break;
 	case QED_VPORT:
-		*p_resc_num = (b_ah ? MAX_NUM_VPORTS_K2 :
-			       MAX_NUM_VPORTS_BB) / num_funcs;
+		*p_resc_num = NUM_OF_VPORTS(cdev) / num_funcs;
 		break;
 	case QED_RSS_ENG:
-		*p_resc_num = (b_ah ? ETH_RSS_ENGINE_NUM_K2 :
-			       ETH_RSS_ENGINE_NUM_BB) / num_funcs;
+		*p_resc_num = NUM_OF_RSS_ENGINES(cdev) / num_funcs;
 		break;
 	case QED_PQ:
-		*p_resc_num = (b_ah ? MAX_QM_TX_QUEUES_K2 :
-			       MAX_QM_TX_QUEUES_BB) / num_funcs;
+		*p_resc_num = NUM_OF_QM_TX_QUEUES(cdev) / num_funcs;
 		*p_resc_num &= ~0x7;	/* The granularity of the PQs is 8 */
 		break;
 	case QED_RL:
-		*p_resc_num = MAX_QM_GLOBAL_RLS / num_funcs;
+		*p_resc_num = NUM_OF_QM_GLOBAL_RLS(cdev) / num_funcs;
 		break;
 	case QED_MAC:
 	case QED_VLAN:
@@ -3689,11 +3730,13 @@ int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
 		*p_resc_num = ETH_NUM_MAC_FILTERS / num_funcs;
 		break;
 	case QED_ILT:
-		*p_resc_num = (b_ah ? PXP_NUM_ILT_RECORDS_K2 :
-			       PXP_NUM_ILT_RECORDS_BB) / num_funcs;
+		*p_resc_num = NUM_OF_PXP_ILT_RECORDS(cdev) / num_funcs;
+		break;
+	case QED_LL2_RAM_QUEUE:
+		*p_resc_num = MAX_NUM_LL2_RX_RAM_QUEUES / num_funcs;
 		break;
-	case QED_LL2_QUEUE:
-		*p_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs;
+	case QED_LL2_CTX_QUEUE:
+		*p_resc_num = MAX_NUM_LL2_RX_CTX_QUEUES / num_funcs;
 		break;
 	case QED_RDMA_CNQ_RAM:
 	case QED_CMDQS_CQS:
@@ -3701,8 +3744,7 @@ int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
 		*p_resc_num = NUM_OF_GLOBAL_QUEUES / num_funcs;
 		break;
 	case QED_RDMA_STATS_QUEUE:
-		*p_resc_num = (b_ah ? RDMA_NUM_STATISTIC_COUNTERS_K2 :
-			       RDMA_NUM_STATISTIC_COUNTERS_BB) / num_funcs;
+		*p_resc_num = NUM_OF_RDMA_STATISTIC_COUNTERS(cdev) / num_funcs;
 		break;
 	case QED_BDQ:
 		if (p_hwfn->hw_info.personality != QED_PCI_ISCSI &&
@@ -5087,11 +5129,11 @@ static void qed_configure_wfq_for_all_vports(struct qed_hwfn *p_hwfn,
 	for (i = 0; i < p_hwfn->qm_info.num_vports; i++) {
 		u32 wfq_speed = p_hwfn->qm_info.wfq_data[i].min_speed;
 
-		vport_params[i].vport_wfq = (wfq_speed * QED_WFQ_UNIT) /
+		vport_params[i].wfq = (wfq_speed * QED_WFQ_UNIT) /
 						min_pf_rate;
 		qed_init_vport_wfq(p_hwfn, p_ptt,
 				   vport_params[i].first_tx_pq_id,
-				   vport_params[i].vport_wfq);
+				   vport_params[i].wfq);
 	}
 }
 
@@ -5102,7 +5144,7 @@ static void qed_init_wfq_default_param(struct qed_hwfn *p_hwfn,
 	int i;
 
 	for (i = 0; i < p_hwfn->qm_info.num_vports; i++)
-		p_hwfn->qm_info.qm_vport_params[i].vport_wfq = 1;
+		p_hwfn->qm_info.qm_vport_params[i].wfq = 1;
 }
 
 static void qed_disable_wfq_for_all_vports(struct qed_hwfn *p_hwfn,
@@ -5118,7 +5160,7 @@ static void qed_disable_wfq_for_all_vports(struct qed_hwfn *p_hwfn,
 		qed_init_wfq_default_param(p_hwfn, min_pf_rate);
 		qed_init_vport_wfq(p_hwfn, p_ptt,
 				   vport_params[i].first_tx_pq_id,
-				   vport_params[i].vport_wfq);
+				   vport_params[i].wfq);
 	}
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 47376d4d071f..eb4808b3bf67 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -230,30 +230,6 @@ enum qed_dmae_address_type_t {
 	QED_DMAE_ADDRESS_GRC
 };
 
-/* value of flags If QED_DMAE_FLAG_RW_REPL_SRC flag is set and the
- * source is a block of length DMAE_MAX_RW_SIZE and the
- * destination is larger, the source block will be duplicated as
- * many times as required to fill the destination block. This is
- * used mostly to write a zeroed buffer to destination address
- * using DMA
- */
-#define QED_DMAE_FLAG_RW_REPL_SRC	0x00000001
-#define QED_DMAE_FLAG_VF_SRC		0x00000002
-#define QED_DMAE_FLAG_VF_DST		0x00000004
-#define QED_DMAE_FLAG_COMPLETION_DST	0x00000008
-#define QED_DMAE_FLAG_PORT		0x00000010
-#define QED_DMAE_FLAG_PF_SRC		0x00000020
-#define QED_DMAE_FLAG_PF_DST		0x00000040
-
-struct qed_dmae_params {
-	u32 flags; /* consists of QED_DMAE_FLAG_* values */
-	u8 src_vfid;
-	u8 dst_vfid;
-	u8 port_id;
-	u8 src_pfid;
-	u8 dst_pfid;
-};
-
 /**
  * @brief qed_dmae_host2grc - copy data from source addr to
  * dmae registers using the given ptt
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index de31a382f58e..4c7fa391fd33 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -167,6 +167,8 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
 		goto err;
 	}
 	p_cxt = cxt_info.p_cxt;
+	memset(p_cxt, 0, sizeof(*p_cxt));
+
 	SET_FIELD(p_cxt->tstorm_ag_context.flags3,
 		  E4_TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_EN, 1);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index cf3ceb62e397..4597015b8bff 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -98,6 +98,7 @@ enum core_event_opcode {
 	CORE_EVENT_RX_QUEUE_STOP,
 	CORE_EVENT_RX_QUEUE_FLUSH,
 	CORE_EVENT_TX_QUEUE_UPDATE,
+	CORE_EVENT_QUEUE_STATS_QUERY,
 	MAX_CORE_EVENT_OPCODE
 };
 
@@ -116,7 +117,7 @@ struct core_ll2_port_stats {
 	struct regpair gsi_crcchksm_error;
 };
 
-/* Ethernet TX Per Queue Stats */
+/* LL2 TX Per Queue Stats */
 struct core_ll2_pstorm_per_queue_stat {
 	struct regpair sent_ucast_bytes;
 	struct regpair sent_mcast_bytes;
@@ -124,13 +125,13 @@ struct core_ll2_pstorm_per_queue_stat {
 	struct regpair sent_ucast_pkts;
 	struct regpair sent_mcast_pkts;
 	struct regpair sent_bcast_pkts;
+	struct regpair error_drop_pkts;
 };
 
 /* Light-L2 RX Producers in Tstorm RAM */
 struct core_ll2_rx_prod {
 	__le16 bd_prod;
 	__le16 cqe_prod;
-	__le32 reserved;
 };
 
 struct core_ll2_tstorm_per_queue_stat {
@@ -147,6 +148,18 @@ struct core_ll2_ustorm_per_queue_stat {
 	struct regpair rcv_bcast_pkts;
 };
 
+/* Structure for doorbell data, in PWM mode, for RX producers update. */
+struct core_pwm_prod_update_data {
+	__le16 icid; /* internal CID */
+	u8 reserved0;
+	u8 params;
+#define CORE_PWM_PROD_UPDATE_DATA_AGG_CMD_MASK	  0x3
+#define CORE_PWM_PROD_UPDATE_DATA_AGG_CMD_SHIFT   0
+#define CORE_PWM_PROD_UPDATE_DATA_RESERVED1_MASK  0x3F	/* Set 0 */
+#define CORE_PWM_PROD_UPDATE_DATA_RESERVED1_SHIFT 2
+	struct core_ll2_rx_prod prod; /* Producers */
+};
+
 /* Core Ramrod Command IDs (light L2) */
 enum core_ramrod_cmd_id {
 	CORE_RAMROD_UNUSED,
@@ -156,6 +169,7 @@ enum core_ramrod_cmd_id {
 	CORE_RAMROD_TX_QUEUE_STOP,
 	CORE_RAMROD_RX_QUEUE_FLUSH,
 	CORE_RAMROD_TX_QUEUE_UPDATE,
+	CORE_RAMROD_QUEUE_STATS_QUERY,
 	MAX_CORE_RAMROD_CMD_ID
 };
 
@@ -236,7 +250,8 @@ struct core_rx_gsi_offload_cqe {
 	__le16 src_mac_addrlo;
 	__le16 qp_id;
 	__le32 src_qp;
-	__le32 reserved[3];
+	struct core_rx_cqe_opaque_data opaque_data;
+	__le32 reserved;
 };
 
 /* Core RX CQE for Light L2 */
@@ -274,8 +289,11 @@ struct core_rx_start_ramrod_data {
 	u8 mf_si_mcast_accept_all;
 	struct core_rx_action_on_error action_on_error;
 	u8 gsi_offload_flag;
+	u8 vport_id_valid;
+	u8 vport_id;
+	u8 zero_prod_flg;
 	u8 wipe_inner_vlan_pri_en;
-	u8 reserved[5];
+	u8 reserved[2];
 };
 
 /* Ramrod data for rx queue stop ramrod */
@@ -352,8 +370,11 @@ struct core_tx_start_ramrod_data {
 	__le16 pbl_size;
 	__le16 qm_pq_id;
 	u8 gsi_offload_flag;
+	u8 ctx_stats_en;
+	u8 vport_id_valid;
 	u8 vport_id;
-	u8 resrved[2];
+	u8 enforce_security_flag;
+	u8 reserved[7];
 };
 
 /* Ramrod data for tx queue stop ramrod */
@@ -385,7 +406,7 @@ struct ystorm_core_conn_st_ctx {
 
 /* The core storm context for the Pstorm */
 struct pstorm_core_conn_st_ctx {
-	__le32 reserved[4];
+	__le32 reserved[20];
 };
 
 /* Core Slowpath Connection storm context of Xstorm */
@@ -761,7 +782,7 @@ struct e4_tstorm_core_conn_ag_ctx {
 	__le16 word1;
 	__le16 word2;
 	__le16 word3;
-	__le32 reg9;
+	__le32 ll2_rx_prod;
 	__le32 reg10;
 };
 
@@ -836,11 +857,16 @@ struct e4_ustorm_core_conn_ag_ctx {
 
 /* The core storm context for the Mstorm */
 struct mstorm_core_conn_st_ctx {
-	__le32 reserved[24];
+	__le32 reserved[40];
 };
 
 /* The core storm context for the Ustorm */
 struct ustorm_core_conn_st_ctx {
+	__le32 reserved[20];
+};
+
+/* The core storm context for the Tstorm */
+struct tstorm_core_conn_st_ctx {
 	__le32 reserved[4];
 };
 
@@ -857,6 +883,8 @@ struct e4_core_conn_context {
 	struct mstorm_core_conn_st_ctx mstorm_st_context;
 	struct ustorm_core_conn_st_ctx ustorm_st_context;
 	struct regpair ustorm_st_padding[2];
+	struct tstorm_core_conn_st_ctx tstorm_st_context;
+	struct regpair tstorm_st_padding[2];
 };
 
 struct eth_mstorm_per_pf_stat {
@@ -888,12 +916,21 @@ struct eth_pstorm_per_pf_stat {
 	struct regpair sent_gre_bytes;
 	struct regpair sent_vxlan_bytes;
 	struct regpair sent_geneve_bytes;
+	struct regpair sent_mpls_bytes;
+	struct regpair sent_gre_mpls_bytes;
+	struct regpair sent_udp_mpls_bytes;
 	struct regpair sent_gre_pkts;
 	struct regpair sent_vxlan_pkts;
 	struct regpair sent_geneve_pkts;
+	struct regpair sent_mpls_pkts;
+	struct regpair sent_gre_mpls_pkts;
+	struct regpair sent_udp_mpls_pkts;
 	struct regpair gre_drop_pkts;
 	struct regpair vxlan_drop_pkts;
 	struct regpair geneve_drop_pkts;
+	struct regpair mpls_drop_pkts;
+	struct regpair gre_mpls_drop_pkts;
+	struct regpair udp_mpls_drop_pkts;
 };
 
 /* Ethernet TX Per Queue Stats */
@@ -983,7 +1020,8 @@ union event_ring_data {
 struct event_ring_entry {
 	u8 protocol_id;
 	u8 opcode;
-	__le16 reserved0;
+	u8 reserved0;
+	u8 vf_id;
 	__le16 echo;
 	u8 fw_return_code;
 	u8 flags;
@@ -1061,7 +1099,20 @@ enum malicious_vf_error_id {
 	ETH_CONTROL_PACKET_VIOLATION,
 	ETH_ANTI_SPOOFING_ERR,
 	ETH_PACKET_SIZE_TOO_LARGE,
-	MAX_MALICIOUS_VF_ERROR_ID
+	CORE_ILLEGAL_VLAN_MODE,
+	CORE_ILLEGAL_NBDS,
+	CORE_FIRST_BD_WO_SOP,
+	CORE_INSUFFICIENT_BDS,
+	CORE_PACKET_TOO_SMALL,
+	CORE_ILLEGAL_INBAND_TAGS,
+	CORE_VLAN_INSERT_AND_INBAND_VLAN,
+	CORE_MTU_VIOLATION,
+	CORE_CONTROL_PACKET_VIOLATION,
+	CORE_ANTI_SPOOFING_ERR,
+	CORE_PACKET_SIZE_TOO_LARGE,
+	CORE_ILLEGAL_BD_FLAGS,
+	CORE_GSI_PACKET_VIOLATION,
+	MAX_MALICIOUS_VF_ERROR_ID,
 };
 
 /* Mstorm non-triggering VF zone */
@@ -1367,6 +1418,16 @@ enum vf_zone_size_mode {
 	MAX_VF_ZONE_SIZE_MODE
 };
 
+/* Xstorm non-triggering VF zone */
+struct xstorm_non_trigger_vf_zone {
+	struct regpair non_edpm_ack_pkts;
+};
+
+/* Tstorm VF zone */
+struct xstorm_vf_zone {
+	struct xstorm_non_trigger_vf_zone non_trigger;
+};
+
 /* Attentions status block */
 struct atten_status_block {
 	__le32 atten_bits;
@@ -1435,7 +1496,11 @@ struct dmae_cmd {
 	__le16 crc16;
 	__le16 crc16_c;
 	__le16 crc10;
-	__le16 reserved;
+	__le16 error_bit_reserved;
+#define DMAE_CMD_ERROR_BIT_MASK        0x1
+#define DMAE_CMD_ERROR_BIT_SHIFT       0
+#define DMAE_CMD_RESERVED_MASK	       0x7FFF
+#define DMAE_CMD_RESERVED_SHIFT        1
 	__le16 xsum16;
 	__le16 xsum8;
 };
@@ -1566,6 +1631,41 @@ struct e4_ystorm_core_conn_ag_ctx {
 	__le32 reg3;
 };
 
+/* DMAE parameters */
+struct qed_dmae_params {
+	u32 flags;
+/* If QED_DMAE_PARAMS_RW_REPL_SRC flag is set and the
+ * source is a block of length DMAE_MAX_RW_SIZE and the
+ * destination is larger, the source block will be duplicated as
+ * many times as required to fill the destination block. This is
+ * used mostly to write a zeroed buffer to destination address
+ * using DMA
+ */
+#define QED_DMAE_PARAMS_RW_REPL_SRC_MASK	0x1
+#define QED_DMAE_PARAMS_RW_REPL_SRC_SHIFT	0
+#define QED_DMAE_PARAMS_SRC_VF_VALID_MASK	0x1
+#define QED_DMAE_PARAMS_SRC_VF_VALID_SHIFT	1
+#define QED_DMAE_PARAMS_DST_VF_VALID_MASK	0x1
+#define QED_DMAE_PARAMS_DST_VF_VALID_SHIFT	2
+#define QED_DMAE_PARAMS_COMPLETION_DST_MASK	0x1
+#define QED_DMAE_PARAMS_COMPLETION_DST_SHIFT	3
+#define QED_DMAE_PARAMS_PORT_VALID_MASK		0x1
+#define QED_DMAE_PARAMS_PORT_VALID_SHIFT	4
+#define QED_DMAE_PARAMS_SRC_PF_VALID_MASK	0x1
+#define QED_DMAE_PARAMS_SRC_PF_VALID_SHIFT	5
+#define QED_DMAE_PARAMS_DST_PF_VALID_MASK	0x1
+#define QED_DMAE_PARAMS_DST_PF_VALID_SHIFT	6
+#define QED_DMAE_PARAMS_RESERVED_MASK		0x1FFFFFF
+#define QED_DMAE_PARAMS_RESERVED_SHIFT		7
+	u8 src_vfid;
+	u8 dst_vfid;
+	u8 port_id;
+	u8 src_pfid;
+	u8 dst_pfid;
+	u8 reserved1;
+	__le16 reserved2;
+};
+
 /* IGU cleanup command */
 struct igu_cleanup {
 	__le32 sb_id_and_flags;
@@ -1743,102 +1843,23 @@ struct sdm_op_gen {
 #define SDM_OP_GEN_RESERVED_SHIFT	20
 };
 
+/* Physical memory descriptor */
+struct phys_mem_desc {
+	dma_addr_t phys_addr;
+	void *virt_addr;
+	u32 size;		/* In bytes */
+};
+
+/* Virtual memory descriptor */
+struct virt_mem_desc {
+	void *ptr;
+	u32 size;		/* In bytes */
+};
+
 /****************************************/
 /* Debug Tools HSI constants and macros */
 /****************************************/
 
-enum block_addr {
-	GRCBASE_GRC = 0x50000,
-	GRCBASE_MISCS = 0x9000,
-	GRCBASE_MISC = 0x8000,
-	GRCBASE_DBU = 0xa000,
-	GRCBASE_PGLUE_B = 0x2a8000,
-	GRCBASE_CNIG = 0x218000,
-	GRCBASE_CPMU = 0x30000,
-	GRCBASE_NCSI = 0x40000,
-	GRCBASE_OPTE = 0x53000,
-	GRCBASE_BMB = 0x540000,
-	GRCBASE_PCIE = 0x54000,
-	GRCBASE_MCP = 0xe00000,
-	GRCBASE_MCP2 = 0x52000,
-	GRCBASE_PSWHST = 0x2a0000,
-	GRCBASE_PSWHST2 = 0x29e000,
-	GRCBASE_PSWRD = 0x29c000,
-	GRCBASE_PSWRD2 = 0x29d000,
-	GRCBASE_PSWWR = 0x29a000,
-	GRCBASE_PSWWR2 = 0x29b000,
-	GRCBASE_PSWRQ = 0x280000,
-	GRCBASE_PSWRQ2 = 0x240000,
-	GRCBASE_PGLCS = 0x0,
-	GRCBASE_DMAE = 0xc000,
-	GRCBASE_PTU = 0x560000,
-	GRCBASE_TCM = 0x1180000,
-	GRCBASE_MCM = 0x1200000,
-	GRCBASE_UCM = 0x1280000,
-	GRCBASE_XCM = 0x1000000,
-	GRCBASE_YCM = 0x1080000,
-	GRCBASE_PCM = 0x1100000,
-	GRCBASE_QM = 0x2f0000,
-	GRCBASE_TM = 0x2c0000,
-	GRCBASE_DORQ = 0x100000,
-	GRCBASE_BRB = 0x340000,
-	GRCBASE_SRC = 0x238000,
-	GRCBASE_PRS = 0x1f0000,
-	GRCBASE_TSDM = 0xfb0000,
-	GRCBASE_MSDM = 0xfc0000,
-	GRCBASE_USDM = 0xfd0000,
-	GRCBASE_XSDM = 0xf80000,
-	GRCBASE_YSDM = 0xf90000,
-	GRCBASE_PSDM = 0xfa0000,
-	GRCBASE_TSEM = 0x1700000,
-	GRCBASE_MSEM = 0x1800000,
-	GRCBASE_USEM = 0x1900000,
-	GRCBASE_XSEM = 0x1400000,
-	GRCBASE_YSEM = 0x1500000,
-	GRCBASE_PSEM = 0x1600000,
-	GRCBASE_RSS = 0x238800,
-	GRCBASE_TMLD = 0x4d0000,
-	GRCBASE_MULD = 0x4e0000,
-	GRCBASE_YULD = 0x4c8000,
-	GRCBASE_XYLD = 0x4c0000,
-	GRCBASE_PTLD = 0x5a0000,
-	GRCBASE_YPLD = 0x5c0000,
-	GRCBASE_PRM = 0x230000,
-	GRCBASE_PBF_PB1 = 0xda0000,
-	GRCBASE_PBF_PB2 = 0xda4000,
-	GRCBASE_RPB = 0x23c000,
-	GRCBASE_BTB = 0xdb0000,
-	GRCBASE_PBF = 0xd80000,
-	GRCBASE_RDIF = 0x300000,
-	GRCBASE_TDIF = 0x310000,
-	GRCBASE_CDU = 0x580000,
-	GRCBASE_CCFC = 0x2e0000,
-	GRCBASE_TCFC = 0x2d0000,
-	GRCBASE_IGU = 0x180000,
-	GRCBASE_CAU = 0x1c0000,
-	GRCBASE_RGFS = 0xf00000,
-	GRCBASE_RGSRC = 0x320000,
-	GRCBASE_TGFS = 0xd00000,
-	GRCBASE_TGSRC = 0x322000,
-	GRCBASE_UMAC = 0x51000,
-	GRCBASE_XMAC = 0x210000,
-	GRCBASE_DBG = 0x10000,
-	GRCBASE_NIG = 0x500000,
-	GRCBASE_WOL = 0x600000,
-	GRCBASE_BMBN = 0x610000,
-	GRCBASE_IPC = 0x20000,
-	GRCBASE_NWM = 0x800000,
-	GRCBASE_NWS = 0x700000,
-	GRCBASE_MS = 0x6a0000,
-	GRCBASE_PHY_PCIE = 0x620000,
-	GRCBASE_LED = 0x6b8000,
-	GRCBASE_AVS_WRAP = 0x6b0000,
-	GRCBASE_PXPREQBUS = 0x56000,
-	GRCBASE_MISC_AEU = 0x8000,
-	GRCBASE_BAR0_MAP = 0x1c00000,
-	MAX_BLOCK_ADDR
-};
-
 enum block_id {
 	BLOCK_GRC,
 	BLOCK_MISCS,
@@ -1893,8 +1914,6 @@ enum block_id {
 	BLOCK_MULD,
 	BLOCK_YULD,
 	BLOCK_XYLD,
-	BLOCK_PTLD,
-	BLOCK_YPLD,
 	BLOCK_PRM,
 	BLOCK_PBF_PB1,
 	BLOCK_PBF_PB2,
@@ -1908,12 +1927,9 @@ enum block_id {
 	BLOCK_TCFC,
 	BLOCK_IGU,
 	BLOCK_CAU,
-	BLOCK_RGFS,
-	BLOCK_RGSRC,
-	BLOCK_TGFS,
-	BLOCK_TGSRC,
 	BLOCK_UMAC,
 	BLOCK_XMAC,
+	BLOCK_MSTAT,
 	BLOCK_DBG,
 	BLOCK_NIG,
 	BLOCK_WOL,
@@ -1926,8 +1942,17 @@ enum block_id {
 	BLOCK_LED,
 	BLOCK_AVS_WRAP,
 	BLOCK_PXPREQBUS,
-	BLOCK_MISC_AEU,
 	BLOCK_BAR0_MAP,
+	BLOCK_MCP_FIO,
+	BLOCK_LAST_INIT,
+	BLOCK_PRS_FC,
+	BLOCK_PBF_FC,
+	BLOCK_NIG_LB_FC,
+	BLOCK_NIG_LB_FC_PLLH,
+	BLOCK_NIG_TX_FC_PLLH,
+	BLOCK_NIG_TX_FC,
+	BLOCK_NIG_RX_FC_PLLH,
+	BLOCK_NIG_RX_FC,
 	MAX_BLOCK_ID
 };
 
@@ -1944,10 +1969,13 @@ enum bin_dbg_buffer_type {
 	BIN_BUF_DBG_ATTN_REGS,
 	BIN_BUF_DBG_ATTN_INDEXES,
 	BIN_BUF_DBG_ATTN_NAME_OFFSETS,
-	BIN_BUF_DBG_BUS_BLOCKS,
+	BIN_BUF_DBG_BLOCKS,
+	BIN_BUF_DBG_BLOCKS_CHIP_DATA,
 	BIN_BUF_DBG_BUS_LINES,
-	BIN_BUF_DBG_BUS_BLOCKS_USER_DATA,
+	BIN_BUF_DBG_BLOCKS_USER_DATA,
+	BIN_BUF_DBG_BLOCKS_CHIP_USER_DATA,
 	BIN_BUF_DBG_BUS_LINE_NAME_OFFSETS,
+	BIN_BUF_DBG_RESET_REGS,
 	BIN_BUF_DBG_PARSING_STRINGS,
 	MAX_BIN_DBG_BUFFER_TYPE
 };
@@ -2031,20 +2059,54 @@ enum dbg_attn_type {
 	MAX_DBG_ATTN_TYPE
 };
 
-/* Debug Bus block data */
-struct dbg_bus_block {
-	u8 num_of_lines;
-	u8 has_latency_events;
-	u16 lines_offset;
+/* Block debug data */
+struct dbg_block {
+	u8 name[15];
+	u8 associated_storm_letter;
 };
 
-/* Debug Bus block user data */
-struct dbg_bus_block_user_data {
-	u8 num_of_lines;
+/* Chip-specific block debug data */
+struct dbg_block_chip {
+	u8 flags;
+#define DBG_BLOCK_CHIP_IS_REMOVED_MASK		 0x1
+#define DBG_BLOCK_CHIP_IS_REMOVED_SHIFT		 0
+#define DBG_BLOCK_CHIP_HAS_RESET_REG_MASK	 0x1
+#define DBG_BLOCK_CHIP_HAS_RESET_REG_SHIFT	 1
+#define DBG_BLOCK_CHIP_UNRESET_BEFORE_DUMP_MASK  0x1
+#define DBG_BLOCK_CHIP_UNRESET_BEFORE_DUMP_SHIFT 2
+#define DBG_BLOCK_CHIP_HAS_DBG_BUS_MASK		 0x1
+#define DBG_BLOCK_CHIP_HAS_DBG_BUS_SHIFT	 3
+#define DBG_BLOCK_CHIP_HAS_LATENCY_EVENTS_MASK	 0x1
+#define DBG_BLOCK_CHIP_HAS_LATENCY_EVENTS_SHIFT  4
+#define DBG_BLOCK_CHIP_RESERVED0_MASK		 0x7
+#define DBG_BLOCK_CHIP_RESERVED0_SHIFT		 5
+	u8 dbg_client_id;
+	u8 reset_reg_id;
+	u8 reset_reg_bit_offset;
+	struct dbg_mode_hdr dbg_bus_mode;
+	u16 reserved1;
+	u8 reserved2;
+	u8 num_of_dbg_bus_lines;
+	u16 dbg_bus_lines_offset;
+	u32 dbg_select_reg_addr;
+	u32 dbg_dword_enable_reg_addr;
+	u32 dbg_shift_reg_addr;
+	u32 dbg_force_valid_reg_addr;
+	u32 dbg_force_frame_reg_addr;
+};
+
+/* Chip-specific block user debug data */
+struct dbg_block_chip_user {
+	u8 num_of_dbg_bus_lines;
 	u8 has_latency_events;
 	u16 names_offset;
 };
 
+/* Block user debug data */
+struct dbg_block_user {
+	u8 name[16];
+};
+
 /* Block Debug line data */
 struct dbg_bus_line {
 	u8 data;
@@ -2197,22 +2259,33 @@ enum dbg_idle_chk_severity_types {
 	MAX_DBG_IDLE_CHK_SEVERITY_TYPES
 };
 
+/* Reset register */
+struct dbg_reset_reg {
+	u32 data;
+#define DBG_RESET_REG_ADDR_MASK        0xFFFFFF
+#define DBG_RESET_REG_ADDR_SHIFT       0
+#define DBG_RESET_REG_IS_REMOVED_MASK  0x1
+#define DBG_RESET_REG_IS_REMOVED_SHIFT 24
+#define DBG_RESET_REG_RESERVED_MASK    0x7F
+#define DBG_RESET_REG_RESERVED_SHIFT   25
+};
+
 /* Debug Bus block data */
 struct dbg_bus_block_data {
-	u16 data;
-#define DBG_BUS_BLOCK_DATA_ENABLE_MASK_MASK		0xF
-#define DBG_BUS_BLOCK_DATA_ENABLE_MASK_SHIFT		0
-#define DBG_BUS_BLOCK_DATA_RIGHT_SHIFT_MASK		0xF
-#define DBG_BUS_BLOCK_DATA_RIGHT_SHIFT_SHIFT		4
-#define DBG_BUS_BLOCK_DATA_FORCE_VALID_MASK_MASK	0xF
-#define DBG_BUS_BLOCK_DATA_FORCE_VALID_MASK_SHIFT	8
-#define DBG_BUS_BLOCK_DATA_FORCE_FRAME_MASK_MASK	0xF
-#define DBG_BUS_BLOCK_DATA_FORCE_FRAME_MASK_SHIFT	12
+	u8 enable_mask;
+	u8 right_shift;
+	u8 force_valid_mask;
+	u8 force_frame_mask;
+	u8 dword_mask;
 	u8 line_num;
 	u8 hw_id;
+	u8 flags;
+#define DBG_BUS_BLOCK_DATA_IS_256B_LINE_MASK  0x1
+#define DBG_BUS_BLOCK_DATA_IS_256B_LINE_SHIFT 0
+#define DBG_BUS_BLOCK_DATA_RESERVED_MASK      0x7F
+#define DBG_BUS_BLOCK_DATA_RESERVED_SHIFT     1
 };
 
-/* Debug Bus Clients */
 enum dbg_bus_clients {
 	DBG_BUS_CLIENT_RBCN,
 	DBG_BUS_CLIENT_RBCP,
@@ -2253,11 +2326,10 @@ enum dbg_bus_constraint_ops {
 
 /* Debug Bus trigger state data */
 struct dbg_bus_trigger_state_data {
-	u8 data;
-#define DBG_BUS_TRIGGER_STATE_DATA_BLOCK_SHIFTED_ENABLE_MASK_MASK	0xF
-#define DBG_BUS_TRIGGER_STATE_DATA_BLOCK_SHIFTED_ENABLE_MASK_SHIFT	0
-#define DBG_BUS_TRIGGER_STATE_DATA_CONSTRAINT_DWORD_MASK_MASK		0xF
-#define DBG_BUS_TRIGGER_STATE_DATA_CONSTRAINT_DWORD_MASK_SHIFT		4
+	u8 msg_len;
+	u8 constraint_dword_mask;
+	u8 storm_id;
+	u8 reserved;
 };
 
 /* Debug Bus memory address */
@@ -2307,8 +2379,7 @@ struct dbg_bus_storm_data {
 struct dbg_bus_data {
 	u32 app_version;
 	u8 state;
-	u8 hw_dwords;
-	u16 hw_id_mask;
+	u8 mode_256b_en;
 	u8 num_enabled_blocks;
 	u8 num_enabled_storms;
 	u8 target;
@@ -2319,67 +2390,21 @@ struct dbg_bus_data {
 	u8 adding_filter;
 	u8 filter_pre_trigger;
 	u8 filter_post_trigger;
-	u16 reserved;
 	u8 trigger_en;
-	struct dbg_bus_trigger_state_data trigger_states[3];
+	u8 filter_constraint_dword_mask;
 	u8 next_trigger_state;
 	u8 next_constraint_id;
-	u8 unify_inputs;
+	struct dbg_bus_trigger_state_data trigger_states[3];
+	u8 filter_msg_len;
 	u8 rcv_from_other_engine;
+	u8 blocks_dword_mask;
+	u8 blocks_dword_overlap;
+	u32 hw_id_mask;
 	struct dbg_bus_pci_buf_data pci_buf;
-	struct dbg_bus_block_data blocks[88];
+	struct dbg_bus_block_data blocks[132];
 	struct dbg_bus_storm_data storms[6];
 };
 
-/* Debug bus filter types */
-enum dbg_bus_filter_types {
-	DBG_BUS_FILTER_TYPE_OFF,
-	DBG_BUS_FILTER_TYPE_PRE,
-	DBG_BUS_FILTER_TYPE_POST,
-	DBG_BUS_FILTER_TYPE_ON,
-	MAX_DBG_BUS_FILTER_TYPES
-};
-
-/* Debug bus frame modes */
-enum dbg_bus_frame_modes {
-	DBG_BUS_FRAME_MODE_0HW_4ST = 0, /* 0 HW dwords, 4 Storm dwords */
-	DBG_BUS_FRAME_MODE_4HW_0ST = 3, /* 4 HW dwords, 0 Storm dwords */
-	DBG_BUS_FRAME_MODE_8HW_0ST = 4, /* 8 HW dwords, 0 Storm dwords */
-	MAX_DBG_BUS_FRAME_MODES
-};
-
-/* Debug bus other engine mode */
-enum dbg_bus_other_engine_modes {
-	DBG_BUS_OTHER_ENGINE_MODE_NONE,
-	DBG_BUS_OTHER_ENGINE_MODE_DOUBLE_BW_TX,
-	DBG_BUS_OTHER_ENGINE_MODE_DOUBLE_BW_RX,
-	DBG_BUS_OTHER_ENGINE_MODE_CROSS_ENGINE_TX,
-	DBG_BUS_OTHER_ENGINE_MODE_CROSS_ENGINE_RX,
-	MAX_DBG_BUS_OTHER_ENGINE_MODES
-};
-
-/* Debug bus post-trigger recording types */
-enum dbg_bus_post_trigger_types {
-	DBG_BUS_POST_TRIGGER_RECORD,
-	DBG_BUS_POST_TRIGGER_DROP,
-	MAX_DBG_BUS_POST_TRIGGER_TYPES
-};
-
-/* Debug bus pre-trigger recording types */
-enum dbg_bus_pre_trigger_types {
-	DBG_BUS_PRE_TRIGGER_START_FROM_ZERO,
-	DBG_BUS_PRE_TRIGGER_NUM_CHUNKS,
-	DBG_BUS_PRE_TRIGGER_DROP,
-	MAX_DBG_BUS_PRE_TRIGGER_TYPES
-};
-
-/* Debug bus SEMI frame modes */
-enum dbg_bus_semi_frame_modes {
-	DBG_BUS_SEMI_FRAME_MODE_0SLOW_4FAST = 0,
-	DBG_BUS_SEMI_FRAME_MODE_4SLOW_0FAST = 3,
-	MAX_DBG_BUS_SEMI_FRAME_MODES
-};
-
 /* Debug bus states */
 enum dbg_bus_states {
 	DBG_BUS_STATE_IDLE,
@@ -2397,7 +2422,9 @@ enum dbg_bus_storm_modes {
 	DBG_BUS_STORM_MODE_DRA_W,
 	DBG_BUS_STORM_MODE_LD_ST_ADDR,
 	DBG_BUS_STORM_MODE_DRA_FSM,
+	DBG_BUS_STORM_MODE_FAST_DBGMUX,
 	DBG_BUS_STORM_MODE_RH,
+	DBG_BUS_STORM_MODE_RH_WITH_STORE,
 	DBG_BUS_STORM_MODE_FOC,
 	DBG_BUS_STORM_MODE_EXT_STORE,
 	MAX_DBG_BUS_STORM_MODES
@@ -2438,13 +2465,13 @@ enum dbg_grc_params {
 	DBG_GRC_PARAM_DUMP_CAU,
 	DBG_GRC_PARAM_DUMP_QM,
 	DBG_GRC_PARAM_DUMP_MCP,
-	DBG_GRC_PARAM_MCP_TRACE_META_SIZE,
+	DBG_GRC_PARAM_DUMP_DORQ,
 	DBG_GRC_PARAM_DUMP_CFC,
 	DBG_GRC_PARAM_DUMP_IGU,
 	DBG_GRC_PARAM_DUMP_BRB,
 	DBG_GRC_PARAM_DUMP_BTB,
 	DBG_GRC_PARAM_DUMP_BMB,
-	DBG_GRC_PARAM_DUMP_NIG,
+	DBG_GRC_PARAM_RESERVD1,
 	DBG_GRC_PARAM_DUMP_MULD,
 	DBG_GRC_PARAM_DUMP_PRS,
 	DBG_GRC_PARAM_DUMP_DMAE,
@@ -2453,8 +2480,8 @@ enum dbg_grc_params {
 	DBG_GRC_PARAM_DUMP_DIF,
 	DBG_GRC_PARAM_DUMP_STATIC,
 	DBG_GRC_PARAM_UNSTALL,
-	DBG_GRC_PARAM_NUM_LCIDS,
-	DBG_GRC_PARAM_NUM_LTIDS,
+	DBG_GRC_PARAM_RESERVED2,
+	DBG_GRC_PARAM_MCP_TRACE_META_SIZE,
 	DBG_GRC_PARAM_EXCLUDE_ALL,
 	DBG_GRC_PARAM_CRASH,
 	DBG_GRC_PARAM_PARITY_SAFE,
@@ -2462,22 +2489,14 @@ enum dbg_grc_params {
 	DBG_GRC_PARAM_DUMP_PHY,
 	DBG_GRC_PARAM_NO_MCP,
 	DBG_GRC_PARAM_NO_FW_VER,
+	DBG_GRC_PARAM_RESERVED3,
+	DBG_GRC_PARAM_DUMP_MCP_HW_DUMP,
+	DBG_GRC_PARAM_DUMP_ILT_CDUC,
+	DBG_GRC_PARAM_DUMP_ILT_CDUT,
+	DBG_GRC_PARAM_DUMP_CAU_EXT,
 	MAX_DBG_GRC_PARAMS
 };
 
-/* Debug reset registers */
-enum dbg_reset_regs {
-	DBG_RESET_REG_MISCS_PL_UA,
-	DBG_RESET_REG_MISCS_PL_HV,
-	DBG_RESET_REG_MISCS_PL_HV_2,
-	DBG_RESET_REG_MISC_PL_UA,
-	DBG_RESET_REG_MISC_PL_HV,
-	DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
-	DBG_RESET_REG_MISC_PL_PDA_VMAIN_2,
-	DBG_RESET_REG_MISC_PL_PDA_VAUX,
-	MAX_DBG_RESET_REGS
-};
-
 /* Debug status codes */
 enum dbg_status {
 	DBG_STATUS_OK,
@@ -2489,15 +2508,15 @@ enum dbg_status {
 	DBG_STATUS_INVALID_PCI_BUF_SIZE,
 	DBG_STATUS_PCI_BUF_ALLOC_FAILED,
 	DBG_STATUS_PCI_BUF_NOT_ALLOCATED,
-	DBG_STATUS_TOO_MANY_INPUTS,
-	DBG_STATUS_INPUT_OVERLAP,
-	DBG_STATUS_HW_ONLY_RECORDING,
+	DBG_STATUS_INVALID_FILTER_TRIGGER_DWORDS,
+	DBG_STATUS_NO_MATCHING_FRAMING_MODE,
+	DBG_STATUS_VFC_READ_ERROR,
 	DBG_STATUS_STORM_ALREADY_ENABLED,
 	DBG_STATUS_STORM_NOT_ENABLED,
 	DBG_STATUS_BLOCK_ALREADY_ENABLED,
 	DBG_STATUS_BLOCK_NOT_ENABLED,
 	DBG_STATUS_NO_INPUT_ENABLED,
-	DBG_STATUS_NO_FILTER_TRIGGER_64B,
+	DBG_STATUS_NO_FILTER_TRIGGER_256B,
 	DBG_STATUS_FILTER_ALREADY_ENABLED,
 	DBG_STATUS_TRIGGER_ALREADY_ENABLED,
 	DBG_STATUS_TRIGGER_NOT_ENABLED,
@@ -2522,7 +2541,7 @@ enum dbg_status {
 	DBG_STATUS_MCP_TRACE_NO_META,
 	DBG_STATUS_MCP_COULD_NOT_HALT,
 	DBG_STATUS_MCP_COULD_NOT_RESUME,
-	DBG_STATUS_RESERVED2,
+	DBG_STATUS_RESERVED0,
 	DBG_STATUS_SEMI_FIFO_NOT_EMPTY,
 	DBG_STATUS_IGU_FIFO_BAD_DATA,
 	DBG_STATUS_MCP_COULD_NOT_MASK_PRTY,
@@ -2530,10 +2549,15 @@ enum dbg_status {
 	DBG_STATUS_REG_FIFO_BAD_DATA,
 	DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA,
 	DBG_STATUS_DBG_ARRAY_NOT_SET,
-	DBG_STATUS_FILTER_BUG,
+	DBG_STATUS_RESERVED1,
 	DBG_STATUS_NON_MATCHING_LINES,
-	DBG_STATUS_INVALID_TRIGGER_DWORD_OFFSET,
+	DBG_STATUS_INSUFFICIENT_HW_IDS,
 	DBG_STATUS_DBG_BUS_IN_USE,
+	DBG_STATUS_INVALID_STORM_DBG_MODE,
+	DBG_STATUS_OTHER_ENGINE_BB_ONLY,
+	DBG_STATUS_FILTER_SINGLE_HW_ID,
+	DBG_STATUS_TRIGGER_SINGLE_HW_ID,
+	DBG_STATUS_MISSING_TRIGGER_STATE_STORM,
 	MAX_DBG_STATUS
 };
 
@@ -2569,9 +2593,9 @@ struct dbg_tools_data {
 	struct dbg_bus_data bus;
 	struct idle_chk_data idle_chk;
 	u8 mode_enable[40];
-	u8 block_in_reset[88];
+	u8 block_in_reset[132];
 	u8 chip_id;
-	u8 platform_id;
+	u8 hw_type;
 	u8 num_ports;
 	u8 num_pfs_per_port;
 	u8 num_vfs;
@@ -2582,6 +2606,19 @@ struct dbg_tools_data {
 	u32 num_regs_read;
 };
 
+/* ILT Clients */
+enum ilt_clients {
+	ILT_CLI_CDUC,
+	ILT_CLI_CDUT,
+	ILT_CLI_QM,
+	ILT_CLI_TM,
+	ILT_CLI_SRC,
+	ILT_CLI_TSDM,
+	ILT_CLI_RGFS,
+	ILT_CLI_TGFS,
+	MAX_ILT_CLIENTS
+};
+
 /********************************/
 /* HSI Init Functions constants */
 /********************************/
@@ -2630,13 +2667,18 @@ struct init_nig_pri_tc_map_req {
 	struct init_nig_pri_tc_map_entry pri[NUM_OF_VLAN_PRIORITIES];
 };
 
+/* QM per global RL init parameters */
+struct init_qm_global_rl_params {
+	u32 rate_limit;
+};
+
 /* QM per-port init parameters */
 struct init_qm_port_params {
-	u8 active;
-	u8 active_phys_tcs;
+	u16 active_phys_tcs;
 	u16 num_pbf_cmd_lines;
 	u16 num_btb_blocks;
-	u16 reserved;
+	u8 active;
+	u8 reserved;
 };
 
 /* QM per-PQ init parameters */
@@ -2645,15 +2687,14 @@ struct init_qm_pq_params {
 	u8 tc_id;
 	u8 wrr_group;
 	u8 rl_valid;
+	u16 rl_id;
 	u8 port_id;
-	u8 reserved0;
-	u16 reserved1;
+	u8 reserved;
 };
 
 /* QM per-vport init parameters */
 struct init_qm_vport_params {
-	u32 vport_rl;
-	u16 vport_wfq;
+	u16 wfq;
 	u16 first_tx_pq_id[NUM_OF_TCS];
 };
 
@@ -2673,13 +2714,12 @@ struct init_qm_vport_params {
 enum chip_ids {
 	CHIP_BB,
 	CHIP_K2,
-	CHIP_RESERVED,
 	MAX_CHIP_IDS
 };
 
 struct fw_asserts_ram_section {
-	u16 section_ram_line_offset;
-	u16 section_ram_line_size;
+	__le16 section_ram_line_offset;
+	__le16 section_ram_line_size;
 	u8 list_dword_offset;
 	u8 list_element_dword_size;
 	u8 list_num_elements;
@@ -2729,6 +2769,7 @@ enum init_modes {
 	MODE_PORTS_PER_ENG_4,
 	MODE_100G,
 	MODE_RESERVED6,
+	MODE_RESERVED7,
 	MAX_INIT_MODES
 };
 
@@ -2763,9 +2804,19 @@ enum bin_init_buffer_type {
 	BIN_BUF_INIT_VAL,
 	BIN_BUF_INIT_MODE_TREE,
 	BIN_BUF_INIT_IRO,
+	BIN_BUF_INIT_OVERLAYS,
 	MAX_BIN_INIT_BUFFER_TYPE
 };
 
+/* FW overlay buffer header */
+struct fw_overlay_buf_hdr {
+	u32 data;
+#define FW_OVERLAY_BUF_HDR_STORM_ID_MASK  0xFF
+#define FW_OVERLAY_BUF_HDR_STORM_ID_SHIFT 0
+#define FW_OVERLAY_BUF_HDR_BUF_SIZE_MASK  0xFFFFFF
+#define FW_OVERLAY_BUF_HDR_BUF_SIZE_SHIFT 8
+};
+
 /* init array header: raw */
 struct init_array_raw_hdr {
 	u32 data;
@@ -2859,10 +2910,8 @@ struct init_if_phase_op {
 	u32 op_data;
 #define INIT_IF_PHASE_OP_OP_MASK		0xF
 #define INIT_IF_PHASE_OP_OP_SHIFT		0
-#define INIT_IF_PHASE_OP_DMAE_ENABLE_MASK	0x1
-#define INIT_IF_PHASE_OP_DMAE_ENABLE_SHIFT	4
-#define INIT_IF_PHASE_OP_RESERVED1_MASK		0x7FF
-#define INIT_IF_PHASE_OP_RESERVED1_SHIFT	5
+#define INIT_IF_PHASE_OP_RESERVED1_MASK		0xFFF
+#define INIT_IF_PHASE_OP_RESERVED1_SHIFT	4
 #define INIT_IF_PHASE_OP_CMD_OFFSET_MASK	0xFFFF
 #define INIT_IF_PHASE_OP_CMD_OFFSET_SHIFT	16
 	u32 phase_data;
@@ -2991,9 +3040,11 @@ struct iro {
  * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug
  *	arrays.
  *
+ * @param p_hwfn -	    HW device data
  * @param bin_ptr - a pointer to the binary data with debug arrays.
  */
-enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr);
+enum dbg_status qed_dbg_set_bin_ptr(struct qed_hwfn *p_hwfn,
+				    const u8 * const bin_ptr);
 
 /**
  * @brief qed_read_regs - Reads registers into a buffer (using GRC).
@@ -3037,7 +3088,6 @@ bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
  *	- val is outside the allowed boundaries
  */
 enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
-				   struct qed_ptt *p_ptt,
 				   enum dbg_grc_params grc_param, u32 val);
 
 /**
@@ -3358,20 +3408,36 @@ enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
 struct mcp_trace_format {
 	u32 data;
 #define MCP_TRACE_FORMAT_MODULE_MASK	0x0000ffff
-#define MCP_TRACE_FORMAT_MODULE_SHIFT	0
+#define MCP_TRACE_FORMAT_MODULE_OFFSET	0
 #define MCP_TRACE_FORMAT_LEVEL_MASK	0x00030000
-#define MCP_TRACE_FORMAT_LEVEL_SHIFT	16
+#define MCP_TRACE_FORMAT_LEVEL_OFFSET	16
 #define MCP_TRACE_FORMAT_P1_SIZE_MASK	0x000c0000
-#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT	18
+#define MCP_TRACE_FORMAT_P1_SIZE_OFFSET 18
 #define MCP_TRACE_FORMAT_P2_SIZE_MASK	0x00300000
-#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT	20
+#define MCP_TRACE_FORMAT_P2_SIZE_OFFSET 20
 #define MCP_TRACE_FORMAT_P3_SIZE_MASK	0x00c00000
-#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT	22
+#define MCP_TRACE_FORMAT_P3_SIZE_OFFSET 22
 #define MCP_TRACE_FORMAT_LEN_MASK	0xff000000
-#define MCP_TRACE_FORMAT_LEN_SHIFT	24
+#define MCP_TRACE_FORMAT_LEN_OFFSET	24
+
 	char *format_str;
 };
 
+/* MCP Trace Meta data structure */
+struct mcp_trace_meta {
+	u32 modules_num;
+	char **modules;
+	u32 formats_num;
+	struct mcp_trace_format *formats;
+	bool is_allocated;
+};
+
+/* Debug Tools user data */
+struct dbg_tools_user_data {
+	struct mcp_trace_meta mcp_trace_meta;
+	const u32 *mcp_trace_user_meta_buf;
+};
+
 /******************************** Constants **********************************/
 
 #define MAX_NAME_LEN	16
@@ -3382,16 +3448,20 @@ struct mcp_trace_format {
  * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with
  *	debug arrays.
  *
+ * @param p_hwfn - HW device data
  * @param bin_ptr - a pointer to the binary data with debug arrays.
  */
-enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr);
+enum dbg_status qed_dbg_user_set_bin_ptr(struct qed_hwfn *p_hwfn,
+					 const u8 * const bin_ptr);
 
 /**
  * @brief qed_dbg_alloc_user_data - Allocates user debug data.
  *
  * @param p_hwfn -		 HW device data
+ * @param user_data_ptr - OUT: a pointer to the allocated memory.
  */
-enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn);
+enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn,
+					void **user_data_ptr);
 
 /**
  * @brief qed_dbg_get_status_str - Returns a string for the specified status.
@@ -3664,271 +3734,6 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
 				   struct dbg_attn_block_result *results);
 
-/* Debug Bus blocks */
-static const u32 dbg_bus_blocks[] = {
-	0x0000000f,		/* grc, bb, 15 lines */
-	0x0000000f,		/* grc, k2, 15 lines */
-	0x00000000,
-	0x00000000,		/* miscs, bb, 0 lines */
-	0x00000000,		/* miscs, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* misc, bb, 0 lines */
-	0x00000000,		/* misc, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* dbu, bb, 0 lines */
-	0x00000000,		/* dbu, k2, 0 lines */
-	0x00000000,
-	0x000f0127,		/* pglue_b, bb, 39 lines */
-	0x0036012a,		/* pglue_b, k2, 42 lines */
-	0x00000000,
-	0x00000000,		/* cnig, bb, 0 lines */
-	0x00120102,		/* cnig, k2, 2 lines */
-	0x00000000,
-	0x00000000,		/* cpmu, bb, 0 lines */
-	0x00000000,		/* cpmu, k2, 0 lines */
-	0x00000000,
-	0x00000001,		/* ncsi, bb, 1 lines */
-	0x00000001,		/* ncsi, k2, 1 lines */
-	0x00000000,
-	0x00000000,		/* opte, bb, 0 lines */
-	0x00000000,		/* opte, k2, 0 lines */
-	0x00000000,
-	0x00600085,		/* bmb, bb, 133 lines */
-	0x00600085,		/* bmb, k2, 133 lines */
-	0x00000000,
-	0x00000000,		/* pcie, bb, 0 lines */
-	0x00e50033,		/* pcie, k2, 51 lines */
-	0x00000000,
-	0x00000000,		/* mcp, bb, 0 lines */
-	0x00000000,		/* mcp, k2, 0 lines */
-	0x00000000,
-	0x01180009,		/* mcp2, bb, 9 lines */
-	0x01180009,		/* mcp2, k2, 9 lines */
-	0x00000000,
-	0x01210104,		/* pswhst, bb, 4 lines */
-	0x01210104,		/* pswhst, k2, 4 lines */
-	0x00000000,
-	0x01250103,		/* pswhst2, bb, 3 lines */
-	0x01250103,		/* pswhst2, k2, 3 lines */
-	0x00000000,
-	0x00340101,		/* pswrd, bb, 1 lines */
-	0x00340101,		/* pswrd, k2, 1 lines */
-	0x00000000,
-	0x01280119,		/* pswrd2, bb, 25 lines */
-	0x01280119,		/* pswrd2, k2, 25 lines */
-	0x00000000,
-	0x01410109,		/* pswwr, bb, 9 lines */
-	0x01410109,		/* pswwr, k2, 9 lines */
-	0x00000000,
-	0x00000000,		/* pswwr2, bb, 0 lines */
-	0x00000000,		/* pswwr2, k2, 0 lines */
-	0x00000000,
-	0x001c0001,		/* pswrq, bb, 1 lines */
-	0x001c0001,		/* pswrq, k2, 1 lines */
-	0x00000000,
-	0x014a0015,		/* pswrq2, bb, 21 lines */
-	0x014a0015,		/* pswrq2, k2, 21 lines */
-	0x00000000,
-	0x00000000,		/* pglcs, bb, 0 lines */
-	0x00120006,		/* pglcs, k2, 6 lines */
-	0x00000000,
-	0x00100001,		/* dmae, bb, 1 lines */
-	0x00100001,		/* dmae, k2, 1 lines */
-	0x00000000,
-	0x015f0105,		/* ptu, bb, 5 lines */
-	0x015f0105,		/* ptu, k2, 5 lines */
-	0x00000000,
-	0x01640120,		/* tcm, bb, 32 lines */
-	0x01640120,		/* tcm, k2, 32 lines */
-	0x00000000,
-	0x01640120,		/* mcm, bb, 32 lines */
-	0x01640120,		/* mcm, k2, 32 lines */
-	0x00000000,
-	0x01640120,		/* ucm, bb, 32 lines */
-	0x01640120,		/* ucm, k2, 32 lines */
-	0x00000000,
-	0x01640120,		/* xcm, bb, 32 lines */
-	0x01640120,		/* xcm, k2, 32 lines */
-	0x00000000,
-	0x01640120,		/* ycm, bb, 32 lines */
-	0x01640120,		/* ycm, k2, 32 lines */
-	0x00000000,
-	0x01640120,		/* pcm, bb, 32 lines */
-	0x01640120,		/* pcm, k2, 32 lines */
-	0x00000000,
-	0x01840062,		/* qm, bb, 98 lines */
-	0x01840062,		/* qm, k2, 98 lines */
-	0x00000000,
-	0x01e60021,		/* tm, bb, 33 lines */
-	0x01e60021,		/* tm, k2, 33 lines */
-	0x00000000,
-	0x02070107,		/* dorq, bb, 7 lines */
-	0x02070107,		/* dorq, k2, 7 lines */
-	0x00000000,
-	0x00600185,		/* brb, bb, 133 lines */
-	0x00600185,		/* brb, k2, 133 lines */
-	0x00000000,
-	0x020e0019,		/* src, bb, 25 lines */
-	0x020c001a,		/* src, k2, 26 lines */
-	0x00000000,
-	0x02270104,		/* prs, bb, 4 lines */
-	0x02270104,		/* prs, k2, 4 lines */
-	0x00000000,
-	0x022b0133,		/* tsdm, bb, 51 lines */
-	0x022b0133,		/* tsdm, k2, 51 lines */
-	0x00000000,
-	0x022b0133,		/* msdm, bb, 51 lines */
-	0x022b0133,		/* msdm, k2, 51 lines */
-	0x00000000,
-	0x022b0133,		/* usdm, bb, 51 lines */
-	0x022b0133,		/* usdm, k2, 51 lines */
-	0x00000000,
-	0x022b0133,		/* xsdm, bb, 51 lines */
-	0x022b0133,		/* xsdm, k2, 51 lines */
-	0x00000000,
-	0x022b0133,		/* ysdm, bb, 51 lines */
-	0x022b0133,		/* ysdm, k2, 51 lines */
-	0x00000000,
-	0x022b0133,		/* psdm, bb, 51 lines */
-	0x022b0133,		/* psdm, k2, 51 lines */
-	0x00000000,
-	0x025e010c,		/* tsem, bb, 12 lines */
-	0x025e010c,		/* tsem, k2, 12 lines */
-	0x00000000,
-	0x025e010c,		/* msem, bb, 12 lines */
-	0x025e010c,		/* msem, k2, 12 lines */
-	0x00000000,
-	0x025e010c,		/* usem, bb, 12 lines */
-	0x025e010c,		/* usem, k2, 12 lines */
-	0x00000000,
-	0x025e010c,		/* xsem, bb, 12 lines */
-	0x025e010c,		/* xsem, k2, 12 lines */
-	0x00000000,
-	0x025e010c,		/* ysem, bb, 12 lines */
-	0x025e010c,		/* ysem, k2, 12 lines */
-	0x00000000,
-	0x025e010c,		/* psem, bb, 12 lines */
-	0x025e010c,		/* psem, k2, 12 lines */
-	0x00000000,
-	0x026a000d,		/* rss, bb, 13 lines */
-	0x026a000d,		/* rss, k2, 13 lines */
-	0x00000000,
-	0x02770106,		/* tmld, bb, 6 lines */
-	0x02770106,		/* tmld, k2, 6 lines */
-	0x00000000,
-	0x027d0106,		/* muld, bb, 6 lines */
-	0x027d0106,		/* muld, k2, 6 lines */
-	0x00000000,
-	0x02770005,		/* yuld, bb, 5 lines */
-	0x02770005,		/* yuld, k2, 5 lines */
-	0x00000000,
-	0x02830107,		/* xyld, bb, 7 lines */
-	0x027d0107,		/* xyld, k2, 7 lines */
-	0x00000000,
-	0x00000000,		/* ptld, bb, 0 lines */
-	0x00000000,		/* ptld, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* ypld, bb, 0 lines */
-	0x00000000,		/* ypld, k2, 0 lines */
-	0x00000000,
-	0x028a010e,		/* prm, bb, 14 lines */
-	0x02980110,		/* prm, k2, 16 lines */
-	0x00000000,
-	0x02a8000d,		/* pbf_pb1, bb, 13 lines */
-	0x02a8000d,		/* pbf_pb1, k2, 13 lines */
-	0x00000000,
-	0x02a8000d,		/* pbf_pb2, bb, 13 lines */
-	0x02a8000d,		/* pbf_pb2, k2, 13 lines */
-	0x00000000,
-	0x02a8000d,		/* rpb, bb, 13 lines */
-	0x02a8000d,		/* rpb, k2, 13 lines */
-	0x00000000,
-	0x00600185,		/* btb, bb, 133 lines */
-	0x00600185,		/* btb, k2, 133 lines */
-	0x00000000,
-	0x02b50117,		/* pbf, bb, 23 lines */
-	0x02b50117,		/* pbf, k2, 23 lines */
-	0x00000000,
-	0x02cc0006,		/* rdif, bb, 6 lines */
-	0x02cc0006,		/* rdif, k2, 6 lines */
-	0x00000000,
-	0x02d20006,		/* tdif, bb, 6 lines */
-	0x02d20006,		/* tdif, k2, 6 lines */
-	0x00000000,
-	0x02d80003,		/* cdu, bb, 3 lines */
-	0x02db000e,		/* cdu, k2, 14 lines */
-	0x00000000,
-	0x02e9010d,		/* ccfc, bb, 13 lines */
-	0x02f60117,		/* ccfc, k2, 23 lines */
-	0x00000000,
-	0x02e9010d,		/* tcfc, bb, 13 lines */
-	0x02f60117,		/* tcfc, k2, 23 lines */
-	0x00000000,
-	0x030d0133,		/* igu, bb, 51 lines */
-	0x030d0133,		/* igu, k2, 51 lines */
-	0x00000000,
-	0x03400106,		/* cau, bb, 6 lines */
-	0x03400106,		/* cau, k2, 6 lines */
-	0x00000000,
-	0x00000000,		/* rgfs, bb, 0 lines */
-	0x00000000,		/* rgfs, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* rgsrc, bb, 0 lines */
-	0x00000000,		/* rgsrc, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* tgfs, bb, 0 lines */
-	0x00000000,		/* tgfs, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* tgsrc, bb, 0 lines */
-	0x00000000,		/* tgsrc, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* umac, bb, 0 lines */
-	0x00120006,		/* umac, k2, 6 lines */
-	0x00000000,
-	0x00000000,		/* xmac, bb, 0 lines */
-	0x00000000,		/* xmac, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* dbg, bb, 0 lines */
-	0x00000000,		/* dbg, k2, 0 lines */
-	0x00000000,
-	0x0346012b,		/* nig, bb, 43 lines */
-	0x0346011d,		/* nig, k2, 29 lines */
-	0x00000000,
-	0x00000000,		/* wol, bb, 0 lines */
-	0x001c0002,		/* wol, k2, 2 lines */
-	0x00000000,
-	0x00000000,		/* bmbn, bb, 0 lines */
-	0x00210008,		/* bmbn, k2, 8 lines */
-	0x00000000,
-	0x00000000,		/* ipc, bb, 0 lines */
-	0x00000000,		/* ipc, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* nwm, bb, 0 lines */
-	0x0371000b,		/* nwm, k2, 11 lines */
-	0x00000000,
-	0x00000000,		/* nws, bb, 0 lines */
-	0x037c0009,		/* nws, k2, 9 lines */
-	0x00000000,
-	0x00000000,		/* ms, bb, 0 lines */
-	0x00120004,		/* ms, k2, 4 lines */
-	0x00000000,
-	0x00000000,		/* phy_pcie, bb, 0 lines */
-	0x00e5001a,		/* phy_pcie, k2, 26 lines */
-	0x00000000,
-	0x00000000,		/* led, bb, 0 lines */
-	0x00000000,		/* led, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* avs_wrap, bb, 0 lines */
-	0x00000000,		/* avs_wrap, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* bar0_map, bb, 0 lines */
-	0x00000000,		/* bar0_map, k2, 0 lines */
-	0x00000000,
-	0x00000000,		/* bar0_map, bb, 0 lines */
-	0x00000000,		/* bar0_map, k2, 0 lines */
-	0x00000000,
-};
-
 /* Win 2 */
 #define GTT_BAR0_MAP_REG_IGU_CMD	0x00f000UL
 
@@ -3942,22 +3747,28 @@ static const u32 dbg_bus_blocks[] = {
 #define GTT_BAR0_MAP_REG_MSDM_RAM_1024	0x012000UL
 
 /* Win 6 */
-#define GTT_BAR0_MAP_REG_USDM_RAM	0x013000UL
+#define GTT_BAR0_MAP_REG_MSDM_RAM_2048	0x013000UL
 
 /* Win 7 */
-#define GTT_BAR0_MAP_REG_USDM_RAM_1024	0x014000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM	0x014000UL
 
 /* Win 8 */
-#define GTT_BAR0_MAP_REG_USDM_RAM_2048	0x015000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM_1024	0x015000UL
 
 /* Win 9 */
-#define GTT_BAR0_MAP_REG_XSDM_RAM	0x016000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM_2048	0x016000UL
 
 /* Win 10 */
-#define GTT_BAR0_MAP_REG_YSDM_RAM	0x017000UL
+#define GTT_BAR0_MAP_REG_XSDM_RAM	0x017000UL
 
 /* Win 11 */
-#define GTT_BAR0_MAP_REG_PSDM_RAM	0x018000UL
+#define GTT_BAR0_MAP_REG_XSDM_RAM_1024	0x018000UL
+
+/* Win 12 */
+#define GTT_BAR0_MAP_REG_YSDM_RAM	0x019000UL
+
+/* Win 13 */
+#define GTT_BAR0_MAP_REG_PSDM_RAM	0x01a000UL
 
 /**
  * @brief qed_qm_pf_mem_size - prepare QM ILT sizes
@@ -3982,7 +3793,7 @@ struct qed_qm_common_rt_init_params {
 	u8 max_phys_tcs_per_port;
 	bool pf_rl_en;
 	bool pf_wfq_en;
-	bool vport_rl_en;
+	bool global_rl_en;
 	bool vport_wfq_en;
 	struct init_qm_port_params *port_params;
 };
@@ -4001,11 +3812,10 @@ struct qed_qm_pf_rt_init_params {
 	u16 start_pq;
 	u16 num_pf_pqs;
 	u16 num_vf_pqs;
-	u8 start_vport;
-	u8 num_vports;
+	u16 start_vport;
+	u16 num_vports;
 	u16 pf_wfq;
 	u32 pf_rl;
-	u32 link_speed;
 	struct init_qm_pq_params *pq_params;
 	struct init_qm_vport_params *vport_params;
 };
@@ -4054,22 +3864,22 @@ int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
  */
 int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
-		       u16 first_tx_pq_id[NUM_OF_TCS], u16 vport_wfq);
+		       u16 first_tx_pq_id[NUM_OF_TCS], u16 wfq);
 
 /**
- * @brief qed_init_vport_rl - Initializes the rate limit of the specified VPORT
+ * @brief qed_init_global_rl - Initializes the rate limit of the specified
+ * rate limiter
  *
  * @param p_hwfn
  * @param p_ptt - ptt window used for writing the registers
- * @param vport_id - VPORT ID
- * @param vport_rl - rate limit in Mb/sec units
- * @param link_speed - link speed in Mbps.
+ * @param rl_id - RL ID
+ * @param rate_limit - rate limit in Mb/sec units
  *
  * @return 0 on success, -1 on error.
  */
-int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt,
-		      u8 vport_id, u32 vport_rl, u32 link_speed);
+int qed_init_global_rl(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       u16 rl_id, u32 rate_limit);
 
 /**
  * @brief qed_send_qm_stop_cmd  Sends a stop command to the QM
@@ -4157,7 +3967,7 @@ void qed_gft_disable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 pf_id);
 /**
  * @brief qed_gft_config - Enable and configure HW for GFT
  *
- * @param p_hwfn
+ * @param p_hwfn - HW device data
  * @param p_ptt - ptt window used for writing the registers.
  * @param pf_id - pf on which to enable GFT.
  * @param tcp - set profile tcp packets.
@@ -4242,6 +4052,42 @@ void qed_memset_task_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type);
 void qed_set_rdma_error_level(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
 			      u8 assert_level[NUM_STORMS]);
+/**
+ * @brief qed_fw_overlay_mem_alloc - Allocates and fills the FW overlay memory.
+ *
+ * @param p_hwfn - HW device data
+ * @param fw_overlay_in_buf - the input FW overlay buffer.
+ * @param buf_size - the size of the input FW overlay buffer in bytes.
+ *		     must be aligned to dwords.
+ * @param fw_overlay_out_mem - OUT: a pointer to the allocated overlays memory.
+ *
+ * @return a pointer to the allocated overlays memory,
+ * or NULL in case of failures.
+ */
+struct phys_mem_desc *
+qed_fw_overlay_mem_alloc(struct qed_hwfn *p_hwfn,
+			 const u32 * const fw_overlay_in_buf,
+			 u32 buf_size_in_bytes);
+
+/**
+ * @brief qed_fw_overlay_init_ram - Initializes the FW overlay RAM.
+ *
+ * @param p_hwfn - HW device data.
+ * @param p_ptt - ptt window used for writing the registers.
+ * @param fw_overlay_mem - the allocated FW overlay memory.
+ */
+void qed_fw_overlay_init_ram(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt,
+			     struct phys_mem_desc *fw_overlay_mem);
+
+/**
+ * @brief qed_fw_overlay_mem_free - Frees the FW overlay memory.
+ *
+ * @param p_hwfn - HW device data.
+ * @param fw_overlay_mem - the allocated FW overlay memory to free.
+ */
+void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
+			     struct phys_mem_desc *fw_overlay_mem);
 
 /* Ystorm flow control mode. Use enum fw_flow_ctrl_mode */
 #define YSTORM_FLOW_CONTROL_MODE_OFFSET			(IRO[0].base)
@@ -4282,851 +4128,807 @@ void qed_set_rdma_error_level(struct qed_hwfn *p_hwfn,
 	(IRO[7].base + ((queue_zone_id) * IRO[7].m1))
 #define USTORM_COMMON_QUEUE_CONS_SIZE			(IRO[7].size)
 
+/* Xstorm common PQ info */
+#define XSTORM_PQ_INFO_OFFSET(pq_id) \
+	(IRO[8].base + ((pq_id) * IRO[8].m1))
+#define XSTORM_PQ_INFO_SIZE				(IRO[8].size)
+
 /* Xstorm Integration Test Data */
-#define XSTORM_INTEG_TEST_DATA_OFFSET			(IRO[8].base)
-#define XSTORM_INTEG_TEST_DATA_SIZE			(IRO[8].size)
+#define XSTORM_INTEG_TEST_DATA_OFFSET			(IRO[9].base)
+#define XSTORM_INTEG_TEST_DATA_SIZE			(IRO[9].size)
 
 /* Ystorm Integration Test Data */
-#define YSTORM_INTEG_TEST_DATA_OFFSET			(IRO[9].base)
-#define YSTORM_INTEG_TEST_DATA_SIZE			(IRO[9].size)
+#define YSTORM_INTEG_TEST_DATA_OFFSET			(IRO[10].base)
+#define YSTORM_INTEG_TEST_DATA_SIZE			(IRO[10].size)
 
 /* Pstorm Integration Test Data */
-#define PSTORM_INTEG_TEST_DATA_OFFSET			(IRO[10].base)
-#define PSTORM_INTEG_TEST_DATA_SIZE			(IRO[10].size)
+#define PSTORM_INTEG_TEST_DATA_OFFSET			(IRO[11].base)
+#define PSTORM_INTEG_TEST_DATA_SIZE			(IRO[11].size)
 
 /* Tstorm Integration Test Data */
-#define TSTORM_INTEG_TEST_DATA_OFFSET			(IRO[11].base)
-#define TSTORM_INTEG_TEST_DATA_SIZE			(IRO[11].size)
+#define TSTORM_INTEG_TEST_DATA_OFFSET			(IRO[12].base)
+#define TSTORM_INTEG_TEST_DATA_SIZE			(IRO[12].size)
 
 /* Mstorm Integration Test Data */
-#define MSTORM_INTEG_TEST_DATA_OFFSET			(IRO[12].base)
-#define MSTORM_INTEG_TEST_DATA_SIZE			(IRO[12].size)
+#define MSTORM_INTEG_TEST_DATA_OFFSET			(IRO[13].base)
+#define MSTORM_INTEG_TEST_DATA_SIZE			(IRO[13].size)
 
 /* Ustorm Integration Test Data */
-#define USTORM_INTEG_TEST_DATA_OFFSET			(IRO[13].base)
-#define USTORM_INTEG_TEST_DATA_SIZE			(IRO[13].size)
+#define USTORM_INTEG_TEST_DATA_OFFSET			(IRO[14].base)
+#define USTORM_INTEG_TEST_DATA_SIZE			(IRO[14].size)
+
+/* Xstorm overlay buffer host address */
+#define XSTORM_OVERLAY_BUF_ADDR_OFFSET			(IRO[15].base)
+#define XSTORM_OVERLAY_BUF_ADDR_SIZE			(IRO[15].size)
+
+/* Ystorm overlay buffer host address */
+#define YSTORM_OVERLAY_BUF_ADDR_OFFSET			(IRO[16].base)
+#define YSTORM_OVERLAY_BUF_ADDR_SIZE			(IRO[16].size)
+
+/* Pstorm overlay buffer host address */
+#define PSTORM_OVERLAY_BUF_ADDR_OFFSET			(IRO[17].base)
+#define PSTORM_OVERLAY_BUF_ADDR_SIZE			(IRO[17].size)
+
+/* Tstorm overlay buffer host address */
+#define TSTORM_OVERLAY_BUF_ADDR_OFFSET			(IRO[18].base)
+#define TSTORM_OVERLAY_BUF_ADDR_SIZE			(IRO[18].size)
+
+/* Mstorm overlay buffer host address */
+#define MSTORM_OVERLAY_BUF_ADDR_OFFSET			(IRO[19].base)
+#define MSTORM_OVERLAY_BUF_ADDR_SIZE			(IRO[19].size)
+
+/* Ustorm overlay buffer host address */
+#define USTORM_OVERLAY_BUF_ADDR_OFFSET			(IRO[20].base)
+#define USTORM_OVERLAY_BUF_ADDR_SIZE			(IRO[20].size)
 
 /* Tstorm producers */
 #define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \
-	(IRO[14].base + ((core_rx_queue_id) * IRO[14].m1))
-#define TSTORM_LL2_RX_PRODS_SIZE			(IRO[14].size)
+	(IRO[21].base + ((core_rx_queue_id) * IRO[21].m1))
+#define TSTORM_LL2_RX_PRODS_SIZE			(IRO[21].size)
 
 /* Tstorm LightL2 queue statistics */
 #define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
-	(IRO[15].base + ((core_rx_queue_id) * IRO[15].m1))
-#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE		(IRO[15].size)
+	(IRO[22].base + ((core_rx_queue_id) * IRO[22].m1))
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE		(IRO[22].size)
 
 /* Ustorm LiteL2 queue statistics */
 #define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
-	(IRO[16].base + ((core_rx_queue_id) * IRO[16].m1))
-#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE		(IRO[16].size)
+	(IRO[23].base + ((core_rx_queue_id) * IRO[23].m1))
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE		(IRO[23].size)
 
 /* Pstorm LiteL2 queue statistics */
 #define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \
-	(IRO[17].base + ((core_tx_stats_id) * IRO[17].m1))
-#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE		(IRO[17].size)
+	(IRO[24].base + ((core_tx_stats_id) * IRO[24].m1))
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE		(IRO[24].size)
 
 /* Mstorm queue statistics */
 #define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-	(IRO[18].base + ((stat_counter_id) * IRO[18].m1))
-#define MSTORM_QUEUE_STAT_SIZE				(IRO[18].size)
+	(IRO[25].base + ((stat_counter_id) * IRO[25].m1))
+#define MSTORM_QUEUE_STAT_SIZE				(IRO[25].size)
 
-/* Mstorm ETH PF queues producers */
-#define MSTORM_ETH_PF_PRODS_OFFSET(queue_id) \
-	(IRO[19].base + ((queue_id) * IRO[19].m1))
-#define MSTORM_ETH_PF_PRODS_SIZE			(IRO[19].size)
+/* TPA agregation timeout in us resolution (on ASIC) */
+#define MSTORM_TPA_TIMEOUT_US_OFFSET			(IRO[26].base)
+#define MSTORM_TPA_TIMEOUT_US_SIZE			(IRO[26].size)
 
 /* Mstorm ETH VF queues producers offset in RAM. Used in default VF zone size
- * mode.
+ * mode
  */
 #define MSTORM_ETH_VF_PRODS_OFFSET(vf_id, vf_queue_id) \
-	(IRO[20].base + ((vf_id) * IRO[20].m1) + ((vf_queue_id) * IRO[20].m2))
-#define MSTORM_ETH_VF_PRODS_SIZE			(IRO[20].size)
+	(IRO[27].base + ((vf_id) * IRO[27].m1) + ((vf_queue_id) * IRO[27].m2))
+#define MSTORM_ETH_VF_PRODS_SIZE			(IRO[27].size)
 
-/* TPA agregation timeout in us resolution (on ASIC) */
-#define MSTORM_TPA_TIMEOUT_US_OFFSET			(IRO[21].base)
-#define MSTORM_TPA_TIMEOUT_US_SIZE			(IRO[21].size)
+/* Mstorm ETH PF queues producers */
+#define MSTORM_ETH_PF_PRODS_OFFSET(queue_id) \
+	(IRO[28].base + ((queue_id) * IRO[28].m1))
+#define MSTORM_ETH_PF_PRODS_SIZE			(IRO[28].size)
 
 /* Mstorm pf statistics */
 #define MSTORM_ETH_PF_STAT_OFFSET(pf_id) \
-	(IRO[22].base + ((pf_id) * IRO[22].m1))
-#define MSTORM_ETH_PF_STAT_SIZE				(IRO[22].size)
+	(IRO[29].base + ((pf_id) * IRO[29].m1))
+#define MSTORM_ETH_PF_STAT_SIZE				(IRO[29].size)
 
 /* Ustorm queue statistics */
 #define USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-	(IRO[23].base + ((stat_counter_id) * IRO[23].m1))
-#define USTORM_QUEUE_STAT_SIZE				(IRO[23].size)
+	(IRO[30].base + ((stat_counter_id) * IRO[30].m1))
+#define USTORM_QUEUE_STAT_SIZE				(IRO[30].size)
 
 /* Ustorm pf statistics */
-#define USTORM_ETH_PF_STAT_OFFSET(pf_id)\
-	(IRO[24].base + ((pf_id) * IRO[24].m1))
-#define USTORM_ETH_PF_STAT_SIZE				(IRO[24].size)
+#define USTORM_ETH_PF_STAT_OFFSET(pf_id) \
+	(IRO[31].base + ((pf_id) * IRO[31].m1))
+#define USTORM_ETH_PF_STAT_SIZE				(IRO[31].size)
 
 /* Pstorm queue statistics */
-#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-	(IRO[25].base + ((stat_counter_id) * IRO[25].m1))
-#define PSTORM_QUEUE_STAT_SIZE				(IRO[25].size)
+#define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id)	\
+	(IRO[32].base + ((stat_counter_id) * IRO[32].m1))
+#define PSTORM_QUEUE_STAT_SIZE				(IRO[32].size)
 
 /* Pstorm pf statistics */
 #define PSTORM_ETH_PF_STAT_OFFSET(pf_id) \
-	(IRO[26].base + ((pf_id) * IRO[26].m1))
-#define PSTORM_ETH_PF_STAT_SIZE				(IRO[26].size)
+	(IRO[33].base + ((pf_id) * IRO[33].m1))
+#define PSTORM_ETH_PF_STAT_SIZE				(IRO[33].size)
 
 /* Control frame's EthType configuration for TX control frame security */
-#define PSTORM_CTL_FRAME_ETHTYPE_OFFSET(eth_type_id) \
-	(IRO[27].base + ((eth_type_id) * IRO[27].m1))
-#define PSTORM_CTL_FRAME_ETHTYPE_SIZE			(IRO[27].size)
+#define PSTORM_CTL_FRAME_ETHTYPE_OFFSET(eth_type_id)	\
+	(IRO[34].base + ((eth_type_id) * IRO[34].m1))
+#define PSTORM_CTL_FRAME_ETHTYPE_SIZE			(IRO[34].size)
 
 /* Tstorm last parser message */
-#define TSTORM_ETH_PRS_INPUT_OFFSET			(IRO[28].base)
-#define TSTORM_ETH_PRS_INPUT_SIZE			(IRO[28].size)
+#define TSTORM_ETH_PRS_INPUT_OFFSET			(IRO[35].base)
+#define TSTORM_ETH_PRS_INPUT_SIZE			(IRO[35].size)
 
 /* Tstorm Eth limit Rx rate */
-#define ETH_RX_RATE_LIMIT_OFFSET(pf_id) \
-	(IRO[29].base + ((pf_id) * IRO[29].m1))
-#define ETH_RX_RATE_LIMIT_SIZE				(IRO[29].size)
+#define ETH_RX_RATE_LIMIT_OFFSET(pf_id)	\
+	(IRO[36].base + ((pf_id) * IRO[36].m1))
+#define ETH_RX_RATE_LIMIT_SIZE				(IRO[36].size)
 
 /* RSS indirection table entry update command per PF offset in TSTORM PF BAR0.
- * Use eth_tstorm_rss_update_data for update.
+ * Use eth_tstorm_rss_update_data for update
  */
 #define TSTORM_ETH_RSS_UPDATE_OFFSET(pf_id) \
-	(IRO[30].base + ((pf_id) * IRO[30].m1))
-#define TSTORM_ETH_RSS_UPDATE_SIZE			(IRO[30].size)
+	(IRO[37].base + ((pf_id) * IRO[37].m1))
+#define TSTORM_ETH_RSS_UPDATE_SIZE			(IRO[37].size)
 
 /* Xstorm queue zone */
 #define XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
-	(IRO[31].base + ((queue_id) * IRO[31].m1))
-#define XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[31].size)
+	(IRO[38].base + ((queue_id) * IRO[38].m1))
+#define XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[38].size)
 
 /* Ystorm cqe producer */
 #define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-	(IRO[32].base + ((rss_id) * IRO[32].m1))
-#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[32].size)
+	(IRO[39].base + ((rss_id) * IRO[39].m1))
+#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[39].size)
 
 /* Ustorm cqe producer */
 #define USTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-	(IRO[33].base + ((rss_id) * IRO[33].m1))
-#define USTORM_TOE_CQ_PROD_SIZE				(IRO[33].size)
+	(IRO[40].base + ((rss_id) * IRO[40].m1))
+#define USTORM_TOE_CQ_PROD_SIZE				(IRO[40].size)
 
 /* Ustorm grq producer */
 #define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) \
-	(IRO[34].base + ((pf_id) * IRO[34].m1))
-#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[34].size)
+	(IRO[41].base + ((pf_id) * IRO[41].m1))
+#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[41].size)
 
 /* Tstorm cmdq-cons of given command queue-id */
 #define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \
-	(IRO[35].base + ((cmdq_queue_id) * IRO[35].m1))
-#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[35].size)
+	(IRO[42].base + ((cmdq_queue_id) * IRO[42].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[42].size)
 
 /* Tstorm (reflects M-Storm) bdq-external-producer of given function ID,
- * BDqueue-id.
+ * BDqueue-id
  */
-#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
-#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[36].size)
+#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(storage_func_id, bdq_id) \
+	(IRO[43].base + ((storage_func_id) * IRO[43].m1) + \
+	 ((bdq_id) * IRO[43].m2))
+#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[43].size)
 
 /* Mstorm bdq-external-producer of given BDQ resource ID, BDqueue-id */
-#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[37].base + ((func_id) * IRO[37].m1) + ((bdq_id) * IRO[37].m2))
-#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[37].size)
+#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(storage_func_id, bdq_id) \
+	(IRO[44].base + ((storage_func_id) * IRO[44].m1) + \
+	 ((bdq_id) * IRO[44].m2))
+#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[44].size)
 
 /* Tstorm iSCSI RX stats */
-#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[38].base + ((pf_id) * IRO[38].m1))
-#define TSTORM_ISCSI_RX_STATS_SIZE			(IRO[38].size)
+#define TSTORM_ISCSI_RX_STATS_OFFSET(storage_func_id) \
+	(IRO[45].base + ((storage_func_id) * IRO[45].m1))
+#define TSTORM_ISCSI_RX_STATS_SIZE			(IRO[45].size)
 
 /* Mstorm iSCSI RX stats */
-#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[39].base + ((pf_id) * IRO[39].m1))
-#define MSTORM_ISCSI_RX_STATS_SIZE			(IRO[39].size)
+#define MSTORM_ISCSI_RX_STATS_OFFSET(storage_func_id) \
+	(IRO[46].base + ((storage_func_id) * IRO[46].m1))
+#define MSTORM_ISCSI_RX_STATS_SIZE			(IRO[46].size)
 
 /* Ustorm iSCSI RX stats */
-#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[40].base + ((pf_id) * IRO[40].m1))
-#define USTORM_ISCSI_RX_STATS_SIZE			(IRO[40].size)
+#define USTORM_ISCSI_RX_STATS_OFFSET(storage_func_id) \
+	(IRO[47].base + ((storage_func_id) * IRO[47].m1))
+#define USTORM_ISCSI_RX_STATS_SIZE			(IRO[47].size)
 
 /* Xstorm iSCSI TX stats */
-#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[41].base + ((pf_id) * IRO[41].m1))
-#define XSTORM_ISCSI_TX_STATS_SIZE			(IRO[41].size)
+#define XSTORM_ISCSI_TX_STATS_OFFSET(storage_func_id) \
+	(IRO[48].base + ((storage_func_id) * IRO[48].m1))
+#define XSTORM_ISCSI_TX_STATS_SIZE			(IRO[48].size)
 
 /* Ystorm iSCSI TX stats */
-#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[42].base + ((pf_id) * IRO[42].m1))
-#define YSTORM_ISCSI_TX_STATS_SIZE			(IRO[42].size)
+#define YSTORM_ISCSI_TX_STATS_OFFSET(storage_func_id) \
+	(IRO[49].base + ((storage_func_id) * IRO[49].m1))
+#define YSTORM_ISCSI_TX_STATS_SIZE			(IRO[49].size)
 
 /* Pstorm iSCSI TX stats */
-#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[43].base + ((pf_id) * IRO[43].m1))
-#define PSTORM_ISCSI_TX_STATS_SIZE			(IRO[43].size)
+#define PSTORM_ISCSI_TX_STATS_OFFSET(storage_func_id) \
+	(IRO[50].base + ((storage_func_id) * IRO[50].m1))
+#define PSTORM_ISCSI_TX_STATS_SIZE			(IRO[50].size)
 
 /* Tstorm FCoE RX stats */
 #define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) \
-	(IRO[44].base + ((pf_id) * IRO[44].m1))
-#define TSTORM_FCOE_RX_STATS_SIZE			(IRO[44].size)
+	(IRO[51].base + ((pf_id) * IRO[51].m1))
+#define TSTORM_FCOE_RX_STATS_SIZE			(IRO[51].size)
 
 /* Pstorm FCoE TX stats */
 #define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) \
-	(IRO[45].base + ((pf_id) * IRO[45].m1))
-#define PSTORM_FCOE_TX_STATS_SIZE			(IRO[45].size)
+	(IRO[52].base + ((pf_id) * IRO[52].m1))
+#define PSTORM_FCOE_TX_STATS_SIZE			(IRO[52].size)
 
 /* Pstorm RDMA queue statistics */
 #define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
-	(IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1))
-#define PSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[46].size)
+	(IRO[53].base + ((rdma_stat_counter_id) * IRO[53].m1))
+#define PSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[53].size)
 
 /* Tstorm RDMA queue statistics */
 #define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
-	(IRO[47].base + ((rdma_stat_counter_id) * IRO[47].m1))
-#define TSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[47].size)
+	(IRO[54].base + ((rdma_stat_counter_id) * IRO[54].m1))
+#define TSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[54].size)
 
 /* Xstorm error level for assert */
 #define XSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[48].base +	((pf_id) * IRO[48].m1))
-#define XSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[48].size)
+	(IRO[55].base + ((pf_id) * IRO[55].m1))
+#define XSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[55].size)
 
 /* Ystorm error level for assert */
 #define YSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[49].base + ((pf_id) * IRO[49].m1))
-#define YSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[49].size)
+	(IRO[56].base + ((pf_id) * IRO[56].m1))
+#define YSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[56].size)
 
 /* Pstorm error level for assert */
 #define PSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[50].base +	((pf_id) * IRO[50].m1))
-#define PSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[50].size)
+	(IRO[57].base + ((pf_id) * IRO[57].m1))
+#define PSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[57].size)
 
 /* Tstorm error level for assert */
 #define TSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[51].base +	((pf_id) * IRO[51].m1))
-#define TSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[51].size)
+	(IRO[58].base + ((pf_id) * IRO[58].m1))
+#define TSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[58].size)
 
 /* Mstorm error level for assert */
 #define MSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[52].base + ((pf_id) * IRO[52].m1))
-#define MSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[52].size)
+	(IRO[59].base + ((pf_id) * IRO[59].m1))
+#define MSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[59].size)
 
 /* Ustorm error level for assert */
 #define USTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[53].base + ((pf_id) * IRO[53].m1))
-#define USTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[53].size)
+	(IRO[60].base + ((pf_id) * IRO[60].m1))
+#define USTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[60].size)
 
 /* Xstorm iWARP rxmit stats */
 #define XSTORM_IWARP_RXMIT_STATS_OFFSET(pf_id) \
-	(IRO[54].base +	((pf_id) * IRO[54].m1))
-#define XSTORM_IWARP_RXMIT_STATS_SIZE			(IRO[54].size)
+	(IRO[61].base + ((pf_id) * IRO[61].m1))
+#define XSTORM_IWARP_RXMIT_STATS_SIZE			(IRO[61].size)
 
 /* Tstorm RoCE Event Statistics */
-#define TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) \
-	(IRO[55].base + ((roce_pf_id) * IRO[55].m1))
-#define TSTORM_ROCE_EVENTS_STAT_SIZE			(IRO[55].size)
+#define TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id)	\
+	(IRO[62].base + ((roce_pf_id) * IRO[62].m1))
+#define TSTORM_ROCE_EVENTS_STAT_SIZE			(IRO[62].size)
 
 /* DCQCN Received Statistics */
-#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) \
-	(IRO[56].base + ((roce_pf_id) * IRO[56].m1))
-#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE		(IRO[56].size)
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id)\
+	(IRO[63].base + ((roce_pf_id) * IRO[63].m1))
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE		(IRO[63].size)
 
 /* RoCE Error Statistics */
-#define YSTORM_ROCE_ERROR_STATS_OFFSET(roce_pf_id) \
-	(IRO[57].base + ((roce_pf_id) * IRO[57].m1))
-#define YSTORM_ROCE_ERROR_STATS_SIZE			(IRO[57].size)
+#define YSTORM_ROCE_ERROR_STATS_OFFSET(roce_pf_id)	\
+	(IRO[64].base + ((roce_pf_id) * IRO[64].m1))
+#define YSTORM_ROCE_ERROR_STATS_SIZE			(IRO[64].size)
 
 /* DCQCN Sent Statistics */
-#define PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) \
-	(IRO[58].base + ((roce_pf_id) * IRO[58].m1))
-#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE		(IRO[58].size)
+#define PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id)	\
+	(IRO[65].base + ((roce_pf_id) * IRO[65].m1))
+#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE		(IRO[65].size)
 
 /* RoCE CQEs Statistics */
-#define USTORM_ROCE_CQE_STATS_OFFSET(roce_pf_id) \
-	(IRO[59].base + ((roce_pf_id) * IRO[59].m1))
-#define USTORM_ROCE_CQE_STATS_SIZE			(IRO[59].size)
-
-static const struct iro iro_arr[60] = {
-	{0x0, 0x0, 0x0, 0x0, 0x8},
-	{0x4cb8, 0x88, 0x0, 0x0, 0x88},
-	{0x6530, 0x20, 0x0, 0x0, 0x20},
-	{0xb00, 0x8, 0x0, 0x0, 0x4},
-	{0xa80, 0x8, 0x0, 0x0, 0x4},
-	{0x0, 0x8, 0x0, 0x0, 0x2},
-	{0x80, 0x8, 0x0, 0x0, 0x4},
-	{0x84, 0x8, 0x0, 0x0, 0x2},
-	{0x4c48, 0x0, 0x0, 0x0, 0x78},
-	{0x3e38, 0x0, 0x0, 0x0, 0x78},
-	{0x3ef8, 0x0, 0x0, 0x0, 0x78},
-	{0x4c40, 0x0, 0x0, 0x0, 0x78},
-	{0x4998, 0x0, 0x0, 0x0, 0x78},
-	{0x7f50, 0x0, 0x0, 0x0, 0x78},
-	{0xa28, 0x8, 0x0, 0x0, 0x8},
-	{0x6210, 0x10, 0x0, 0x0, 0x10},
-	{0xb820, 0x30, 0x0, 0x0, 0x30},
-	{0xa990, 0x30, 0x0, 0x0, 0x30},
-	{0x4b68, 0x80, 0x0, 0x0, 0x40},
-	{0x1f8, 0x4, 0x0, 0x0, 0x4},
-	{0x53a8, 0x80, 0x4, 0x0, 0x4},
-	{0xc7d0, 0x0, 0x0, 0x0, 0x4},
-	{0x4ba8, 0x80, 0x0, 0x0, 0x20},
-	{0x8158, 0x40, 0x0, 0x0, 0x30},
-	{0xe770, 0x60, 0x0, 0x0, 0x60},
-	{0x4090, 0x80, 0x0, 0x0, 0x38},
-	{0xfea8, 0x78, 0x0, 0x0, 0x78},
-	{0x1f8, 0x4, 0x0, 0x0, 0x4},
-	{0xaf20, 0x0, 0x0, 0x0, 0xf0},
-	{0xb010, 0x8, 0x0, 0x0, 0x8},
-	{0xc00, 0x8, 0x0, 0x0, 0x8},
-	{0x1f8, 0x8, 0x0, 0x0, 0x8},
-	{0xac0, 0x8, 0x0, 0x0, 0x8},
-	{0x2578, 0x8, 0x0, 0x0, 0x8},
-	{0x24f8, 0x8, 0x0, 0x0, 0x8},
-	{0x0, 0x8, 0x0, 0x0, 0x8},
-	{0x400, 0x18, 0x8, 0x0, 0x8},
-	{0xb78, 0x18, 0x8, 0x0, 0x2},
-	{0xd898, 0x50, 0x0, 0x0, 0x3c},
-	{0x12908, 0x18, 0x0, 0x0, 0x10},
-	{0x11aa8, 0x40, 0x0, 0x0, 0x18},
-	{0xa588, 0x50, 0x0, 0x0, 0x20},
-	{0x8f00, 0x40, 0x0, 0x0, 0x28},
-	{0x10e30, 0x18, 0x0, 0x0, 0x10},
-	{0xde48, 0x48, 0x0, 0x0, 0x38},
-	{0x11298, 0x20, 0x0, 0x0, 0x20},
-	{0x40c8, 0x80, 0x0, 0x0, 0x10},
-	{0x5048, 0x10, 0x0, 0x0, 0x10},
-	{0xc748, 0x8, 0x0, 0x0, 0x1},
-	{0xa928, 0x8, 0x0, 0x0, 0x1},
-	{0x11a30, 0x8, 0x0, 0x0, 0x1},
-	{0xf030, 0x8, 0x0, 0x0, 0x1},
-	{0x13028, 0x8, 0x0, 0x0, 0x1},
-	{0x12c58, 0x8, 0x0, 0x0, 0x1},
-	{0xc9b8, 0x30, 0x0, 0x0, 0x10},
-	{0xed90, 0x28, 0x0, 0x0, 0x28},
-	{0xad20, 0x18, 0x0, 0x0, 0x18},
-	{0xaea0, 0x8, 0x0, 0x0, 0x8},
-	{0x13c38, 0x8, 0x0, 0x0, 0x8},
-	{0x13c50, 0x18, 0x0, 0x0, 0x18},
+#define USTORM_ROCE_CQE_STATS_OFFSET(roce_pf_id)	\
+	(IRO[66].base + ((roce_pf_id) * IRO[66].m1))
+#define USTORM_ROCE_CQE_STATS_SIZE			(IRO[66].size)
+
+/* IRO Array */
+static const u32 iro_arr[] = {
+	0x00000000, 0x00000000, 0x00080000,
+	0x00003288, 0x00000088, 0x00880000,
+	0x000058e8, 0x00000020, 0x00200000,
+	0x00000b00, 0x00000008, 0x00040000,
+	0x00000a80, 0x00000008, 0x00040000,
+	0x00000000, 0x00000008, 0x00020000,
+	0x00000080, 0x00000008, 0x00040000,
+	0x00000084, 0x00000008, 0x00020000,
+	0x00005718, 0x00000004, 0x00040000,
+	0x00004dd0, 0x00000000, 0x00780000,
+	0x00003e40, 0x00000000, 0x00780000,
+	0x00004480, 0x00000000, 0x00780000,
+	0x00003210, 0x00000000, 0x00780000,
+	0x00003b50, 0x00000000, 0x00780000,
+	0x00007f58, 0x00000000, 0x00780000,
+	0x00005f58, 0x00000000, 0x00080000,
+	0x00007100, 0x00000000, 0x00080000,
+	0x0000aea0, 0x00000000, 0x00080000,
+	0x00004398, 0x00000000, 0x00080000,
+	0x0000a5a0, 0x00000000, 0x00080000,
+	0x0000bde8, 0x00000000, 0x00080000,
+	0x00000020, 0x00000004, 0x00040000,
+	0x000056c8, 0x00000010, 0x00100000,
+	0x0000c210, 0x00000030, 0x00300000,
+	0x0000b088, 0x00000038, 0x00380000,
+	0x00003d20, 0x00000080, 0x00400000,
+	0x0000bf60, 0x00000000, 0x00040000,
+	0x00004560, 0x00040080, 0x00040000,
+	0x000001f8, 0x00000004, 0x00040000,
+	0x00003d60, 0x00000080, 0x00200000,
+	0x00008960, 0x00000040, 0x00300000,
+	0x0000e840, 0x00000060, 0x00600000,
+	0x00004618, 0x00000080, 0x00380000,
+	0x00010738, 0x000000c0, 0x00c00000,
+	0x000001f8, 0x00000002, 0x00020000,
+	0x0000a2a0, 0x00000000, 0x01080000,
+	0x0000a3a8, 0x00000008, 0x00080000,
+	0x000001c0, 0x00000008, 0x00080000,
+	0x000001f8, 0x00000008, 0x00080000,
+	0x00000ac0, 0x00000008, 0x00080000,
+	0x00002578, 0x00000008, 0x00080000,
+	0x000024f8, 0x00000008, 0x00080000,
+	0x00000280, 0x00000008, 0x00080000,
+	0x00000680, 0x00080018, 0x00080000,
+	0x00000b78, 0x00080018, 0x00020000,
+	0x0000c640, 0x00000050, 0x003c0000,
+	0x00012038, 0x00000018, 0x00100000,
+	0x00011b00, 0x00000040, 0x00180000,
+	0x000095d0, 0x00000050, 0x00200000,
+	0x00008b10, 0x00000040, 0x00280000,
+	0x00011640, 0x00000018, 0x00100000,
+	0x0000c828, 0x00000048, 0x00380000,
+	0x00011710, 0x00000020, 0x00200000,
+	0x00004650, 0x00000080, 0x00100000,
+	0x00003618, 0x00000010, 0x00100000,
+	0x0000a968, 0x00000008, 0x00010000,
+	0x000097a0, 0x00000008, 0x00010000,
+	0x00011990, 0x00000008, 0x00010000,
+	0x0000f018, 0x00000008, 0x00010000,
+	0x00012628, 0x00000008, 0x00010000,
+	0x00011da8, 0x00000008, 0x00010000,
+	0x0000aa78, 0x00000030, 0x00100000,
+	0x0000d768, 0x00000028, 0x00280000,
+	0x00009a58, 0x00000018, 0x00180000,
+	0x00009bd8, 0x00000008, 0x00080000,
+	0x00013a18, 0x00000008, 0x00080000,
+	0x000126e8, 0x00000018, 0x00180000,
+	0x0000e608, 0x00500288, 0x00100000,
+	0x00012970, 0x00000138, 0x00280000,
 };
 
 /* Runtime array offsets */
-#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET			0
-#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET			1
-#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET			2
-#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET			3
-#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET			4
-#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET			5
-#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET			6
-#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET			7
-#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET			8
-#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET			9
-#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET			10
-#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET			11
-#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET			12
-#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET			13
-#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET			14
-#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET			15
-#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET				16
-#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET			17
-#define DORQ_REG_GLB_MAX_ICID_0_RT_OFFSET			18
-#define DORQ_REG_GLB_MAX_ICID_1_RT_OFFSET			19
-#define DORQ_REG_GLB_RANGE2CONN_TYPE_0_RT_OFFSET		20
-#define DORQ_REG_GLB_RANGE2CONN_TYPE_1_RT_OFFSET		21
-#define DORQ_REG_PRV_PF_MAX_ICID_2_RT_OFFSET			22
-#define DORQ_REG_PRV_PF_MAX_ICID_3_RT_OFFSET			23
-#define DORQ_REG_PRV_PF_MAX_ICID_4_RT_OFFSET			24
-#define DORQ_REG_PRV_PF_MAX_ICID_5_RT_OFFSET			25
-#define DORQ_REG_PRV_VF_MAX_ICID_2_RT_OFFSET			26
-#define DORQ_REG_PRV_VF_MAX_ICID_3_RT_OFFSET			27
-#define DORQ_REG_PRV_VF_MAX_ICID_4_RT_OFFSET			28
-#define DORQ_REG_PRV_VF_MAX_ICID_5_RT_OFFSET			29
-#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_2_RT_OFFSET		30
-#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_3_RT_OFFSET		31
-#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_4_RT_OFFSET		32
-#define DORQ_REG_PRV_PF_RANGE2CONN_TYPE_5_RT_OFFSET		33
-#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_2_RT_OFFSET		34
-#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_3_RT_OFFSET		35
-#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_4_RT_OFFSET		36
-#define DORQ_REG_PRV_VF_RANGE2CONN_TYPE_5_RT_OFFSET		37
-#define IGU_REG_PF_CONFIGURATION_RT_OFFSET			38
-#define IGU_REG_VF_CONFIGURATION_RT_OFFSET			39
-#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET			40
-#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET			41
-#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET			42
-#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET			43
-#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET			44
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET				45
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE				1024
-#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET			1069
-#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE				1024
-#define CAU_REG_PI_MEMORY_RT_OFFSET				2093
-#define CAU_REG_PI_MEMORY_RT_SIZE				4416
-#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET		6509
-#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET		6510
-#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET		6511
-#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET			6512
-#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET			6513
-#define PRS_REG_SEARCH_TCP_RT_OFFSET				6514
-#define PRS_REG_SEARCH_FCOE_RT_OFFSET				6515
-#define PRS_REG_SEARCH_ROCE_RT_OFFSET				6516
-#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET			6517
-#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET			6518
-#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET			6519
-#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET		6520
-#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET	6521
-#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET		6522
-#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET			6523
-#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET			6524
-#define SRC_REG_FIRSTFREE_RT_OFFSET				6525
-#define SRC_REG_FIRSTFREE_RT_SIZE				2
-#define SRC_REG_LASTFREE_RT_OFFSET				6527
-#define SRC_REG_LASTFREE_RT_SIZE				2
-#define SRC_REG_COUNTFREE_RT_OFFSET				6529
-#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET			6530
-#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET			6531
-#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET			6532
-#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET				6533
-#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET				6534
-#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET				6535
-#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET			6536
-#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET			6537
-#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET			6538
-#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET			6539
-#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET			6540
-#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET			6541
-#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET			6542
-#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET			6543
-#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET			6544
-#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET			6545
-#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET			6546
-#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET			6547
-#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET			6548
-#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET		6549
-#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET		6550
-#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET		6551
-#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET			6552
-#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET			6553
-#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET			6554
-#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET			6555
-#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET			6556
-#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET			6557
-#define PSWRQ2_REG_VF_BASE_RT_OFFSET				6558
-#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET			6559
-#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET			6560
-#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET			6561
-#define PSWRQ2_REG_TGSRC_FIRST_ILT_RT_OFFSET			6562
-#define PSWRQ2_REG_RGSRC_FIRST_ILT_RT_OFFSET			6563
-#define PSWRQ2_REG_TGSRC_LAST_ILT_RT_OFFSET			6564
-#define PSWRQ2_REG_RGSRC_LAST_ILT_RT_OFFSET			6565
-#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET				6566
-#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE				26414
-#define PGLUE_REG_B_VF_BASE_RT_OFFSET				32980
-#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET		32981
-#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET			32982
-#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET			32983
-#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET			32984
-#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET			32985
-#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET			32986
-#define TM_REG_VF_ENABLE_CONN_RT_OFFSET				32987
-#define TM_REG_PF_ENABLE_CONN_RT_OFFSET				32988
-#define TM_REG_PF_ENABLE_TASK_RT_OFFSET				32989
-#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET		32990
-#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET		32991
-#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET			32992
-#define TM_REG_CONFIG_CONN_MEM_RT_SIZE				416
-#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET			33408
-#define TM_REG_CONFIG_TASK_MEM_RT_SIZE				608
-#define QM_REG_MAXPQSIZE_0_RT_OFFSET				34016
-#define QM_REG_MAXPQSIZE_1_RT_OFFSET				34017
-#define QM_REG_MAXPQSIZE_2_RT_OFFSET				34018
-#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET			34019
-#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET			34020
-#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET			34021
-#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET			34022
-#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET			34023
-#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET			34024
-#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET			34025
-#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET			34026
-#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET			34027
-#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET			34028
-#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET			34029
-#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET			34030
-#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET			34031
-#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET			34032
-#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET			34033
-#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET			34034
-#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET			34035
-#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET			34036
-#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET			34037
-#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET			34038
-#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET			34039
-#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET			34040
-#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET			34041
-#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET			34042
-#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET			34043
-#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET			34044
-#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET			34045
-#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET			34046
-#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET			34047
-#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET			34048
-#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET			34049
-#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET			34050
-#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET			34051
-#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET			34052
-#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET			34053
-#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET			34054
-#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET			34055
-#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET			34056
-#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET			34057
-#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET			34058
-#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET			34059
-#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET			34060
-#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET			34061
-#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET			34062
-#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET			34063
-#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET			34064
-#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET			34065
-#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET			34066
-#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET			34067
-#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET			34068
-#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET			34069
-#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET			34070
-#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET			34071
-#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET			34072
-#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET			34073
-#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET			34074
-#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET			34075
-#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET			34076
-#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET			34077
-#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET			34078
-#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET			34079
-#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET			34080
-#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET			34081
-#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET			34082
-#define QM_REG_BASEADDROTHERPQ_RT_OFFSET			34083
-#define QM_REG_BASEADDROTHERPQ_RT_SIZE				128
-#define QM_REG_PTRTBLOTHER_RT_OFFSET				34211
-#define QM_REG_PTRTBLOTHER_RT_SIZE				256
-#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET			34467
-#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET			34468
-#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET			34469
-#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET			34470
-#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET			34471
-#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET			34472
-#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET			34473
-#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET			34474
-#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET			34475
-#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET			34476
-#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET			34477
-#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET			34478
-#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET			34479
-#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET			34480
-#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET			34481
-#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET			34482
-#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET			34483
-#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET			34484
-#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET			34485
-#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET			34486
-#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET			34487
-#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET			34488
-#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET			34489
-#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET			34490
-#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET			34491
-#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET			34492
-#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET			34493
-#define QM_REG_PQTX2PF_0_RT_OFFSET				34494
-#define QM_REG_PQTX2PF_1_RT_OFFSET				34495
-#define QM_REG_PQTX2PF_2_RT_OFFSET				34496
-#define QM_REG_PQTX2PF_3_RT_OFFSET				34497
-#define QM_REG_PQTX2PF_4_RT_OFFSET				34498
-#define QM_REG_PQTX2PF_5_RT_OFFSET				34499
-#define QM_REG_PQTX2PF_6_RT_OFFSET				34500
-#define QM_REG_PQTX2PF_7_RT_OFFSET				34501
-#define QM_REG_PQTX2PF_8_RT_OFFSET				34502
-#define QM_REG_PQTX2PF_9_RT_OFFSET				34503
-#define QM_REG_PQTX2PF_10_RT_OFFSET				34504
-#define QM_REG_PQTX2PF_11_RT_OFFSET				34505
-#define QM_REG_PQTX2PF_12_RT_OFFSET				34506
-#define QM_REG_PQTX2PF_13_RT_OFFSET				34507
-#define QM_REG_PQTX2PF_14_RT_OFFSET				34508
-#define QM_REG_PQTX2PF_15_RT_OFFSET				34509
-#define QM_REG_PQTX2PF_16_RT_OFFSET				34510
-#define QM_REG_PQTX2PF_17_RT_OFFSET				34511
-#define QM_REG_PQTX2PF_18_RT_OFFSET				34512
-#define QM_REG_PQTX2PF_19_RT_OFFSET				34513
-#define QM_REG_PQTX2PF_20_RT_OFFSET				34514
-#define QM_REG_PQTX2PF_21_RT_OFFSET				34515
-#define QM_REG_PQTX2PF_22_RT_OFFSET				34516
-#define QM_REG_PQTX2PF_23_RT_OFFSET				34517
-#define QM_REG_PQTX2PF_24_RT_OFFSET				34518
-#define QM_REG_PQTX2PF_25_RT_OFFSET				34519
-#define QM_REG_PQTX2PF_26_RT_OFFSET				34520
-#define QM_REG_PQTX2PF_27_RT_OFFSET				34521
-#define QM_REG_PQTX2PF_28_RT_OFFSET				34522
-#define QM_REG_PQTX2PF_29_RT_OFFSET				34523
-#define QM_REG_PQTX2PF_30_RT_OFFSET				34524
-#define QM_REG_PQTX2PF_31_RT_OFFSET				34525
-#define QM_REG_PQTX2PF_32_RT_OFFSET				34526
-#define QM_REG_PQTX2PF_33_RT_OFFSET				34527
-#define QM_REG_PQTX2PF_34_RT_OFFSET				34528
-#define QM_REG_PQTX2PF_35_RT_OFFSET				34529
-#define QM_REG_PQTX2PF_36_RT_OFFSET				34530
-#define QM_REG_PQTX2PF_37_RT_OFFSET				34531
-#define QM_REG_PQTX2PF_38_RT_OFFSET				34532
-#define QM_REG_PQTX2PF_39_RT_OFFSET				34533
-#define QM_REG_PQTX2PF_40_RT_OFFSET				34534
-#define QM_REG_PQTX2PF_41_RT_OFFSET				34535
-#define QM_REG_PQTX2PF_42_RT_OFFSET				34536
-#define QM_REG_PQTX2PF_43_RT_OFFSET				34537
-#define QM_REG_PQTX2PF_44_RT_OFFSET				34538
-#define QM_REG_PQTX2PF_45_RT_OFFSET				34539
-#define QM_REG_PQTX2PF_46_RT_OFFSET				34540
-#define QM_REG_PQTX2PF_47_RT_OFFSET				34541
-#define QM_REG_PQTX2PF_48_RT_OFFSET				34542
-#define QM_REG_PQTX2PF_49_RT_OFFSET				34543
-#define QM_REG_PQTX2PF_50_RT_OFFSET				34544
-#define QM_REG_PQTX2PF_51_RT_OFFSET				34545
-#define QM_REG_PQTX2PF_52_RT_OFFSET				34546
-#define QM_REG_PQTX2PF_53_RT_OFFSET				34547
-#define QM_REG_PQTX2PF_54_RT_OFFSET				34548
-#define QM_REG_PQTX2PF_55_RT_OFFSET				34549
-#define QM_REG_PQTX2PF_56_RT_OFFSET				34550
-#define QM_REG_PQTX2PF_57_RT_OFFSET				34551
-#define QM_REG_PQTX2PF_58_RT_OFFSET				34552
-#define QM_REG_PQTX2PF_59_RT_OFFSET				34553
-#define QM_REG_PQTX2PF_60_RT_OFFSET				34554
-#define QM_REG_PQTX2PF_61_RT_OFFSET				34555
-#define QM_REG_PQTX2PF_62_RT_OFFSET				34556
-#define QM_REG_PQTX2PF_63_RT_OFFSET				34557
-#define QM_REG_PQOTHER2PF_0_RT_OFFSET				34558
-#define QM_REG_PQOTHER2PF_1_RT_OFFSET				34559
-#define QM_REG_PQOTHER2PF_2_RT_OFFSET				34560
-#define QM_REG_PQOTHER2PF_3_RT_OFFSET				34561
-#define QM_REG_PQOTHER2PF_4_RT_OFFSET				34562
-#define QM_REG_PQOTHER2PF_5_RT_OFFSET				34563
-#define QM_REG_PQOTHER2PF_6_RT_OFFSET				34564
-#define QM_REG_PQOTHER2PF_7_RT_OFFSET				34565
-#define QM_REG_PQOTHER2PF_8_RT_OFFSET				34566
-#define QM_REG_PQOTHER2PF_9_RT_OFFSET				34567
-#define QM_REG_PQOTHER2PF_10_RT_OFFSET				34568
-#define QM_REG_PQOTHER2PF_11_RT_OFFSET				34569
-#define QM_REG_PQOTHER2PF_12_RT_OFFSET				34570
-#define QM_REG_PQOTHER2PF_13_RT_OFFSET				34571
-#define QM_REG_PQOTHER2PF_14_RT_OFFSET				34572
-#define QM_REG_PQOTHER2PF_15_RT_OFFSET				34573
-#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET				34574
-#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET				34575
-#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET			34576
-#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET			34577
-#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET			34578
-#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET			34579
-#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET			34580
-#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET			34581
-#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET			34582
-#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET			34583
-#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET			34584
-#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET			34585
-#define QM_REG_RLGLBLINCVAL_RT_OFFSET				34586
-#define QM_REG_RLGLBLINCVAL_RT_SIZE				256
-#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET			34842
-#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE				256
-#define QM_REG_RLGLBLCRD_RT_OFFSET				35098
-#define QM_REG_RLGLBLCRD_RT_SIZE				256
-#define QM_REG_RLGLBLENABLE_RT_OFFSET				35354
-#define QM_REG_RLPFPERIOD_RT_OFFSET				35355
-#define QM_REG_RLPFPERIODTIMER_RT_OFFSET			35356
-#define QM_REG_RLPFINCVAL_RT_OFFSET				35357
-#define QM_REG_RLPFINCVAL_RT_SIZE				16
-#define QM_REG_RLPFUPPERBOUND_RT_OFFSET				35373
-#define QM_REG_RLPFUPPERBOUND_RT_SIZE				16
-#define QM_REG_RLPFCRD_RT_OFFSET				35389
-#define QM_REG_RLPFCRD_RT_SIZE					16
-#define QM_REG_RLPFENABLE_RT_OFFSET				35405
-#define QM_REG_RLPFVOQENABLE_RT_OFFSET				35406
-#define QM_REG_WFQPFWEIGHT_RT_OFFSET				35407
-#define QM_REG_WFQPFWEIGHT_RT_SIZE				16
-#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET			35423
-#define QM_REG_WFQPFUPPERBOUND_RT_SIZE				16
-#define QM_REG_WFQPFCRD_RT_OFFSET				35439
-#define QM_REG_WFQPFCRD_RT_SIZE					256
-#define QM_REG_WFQPFENABLE_RT_OFFSET				35695
-#define QM_REG_WFQVPENABLE_RT_OFFSET				35696
-#define QM_REG_BASEADDRTXPQ_RT_OFFSET				35697
-#define QM_REG_BASEADDRTXPQ_RT_SIZE				512
-#define QM_REG_TXPQMAP_RT_OFFSET				36209
-#define QM_REG_TXPQMAP_RT_SIZE					512
-#define QM_REG_WFQVPWEIGHT_RT_OFFSET				36721
-#define QM_REG_WFQVPWEIGHT_RT_SIZE				512
-#define QM_REG_WFQVPCRD_RT_OFFSET				37233
-#define QM_REG_WFQVPCRD_RT_SIZE					512
-#define QM_REG_WFQVPMAP_RT_OFFSET				37745
-#define QM_REG_WFQVPMAP_RT_SIZE					512
-#define QM_REG_PTRTBLTX_RT_OFFSET				38257
-#define QM_REG_PTRTBLTX_RT_SIZE					1024
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET				39281
-#define QM_REG_WFQPFCRD_MSB_RT_SIZE				320
-#define QM_REG_VOQCRDLINE_RT_OFFSET				39601
-#define QM_REG_VOQCRDLINE_RT_SIZE				36
-#define QM_REG_VOQINITCRDLINE_RT_OFFSET				39637
-#define QM_REG_VOQINITCRDLINE_RT_SIZE				36
-#define QM_REG_RLPFVOQENABLE_MSB_RT_OFFSET			39673
-#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET			39674
-#define NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET			39675
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET			39676
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET			39677
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET			39678
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET			39679
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET		39680
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET			39681
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE				4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET			39685
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE			4
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET			39689
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE			32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET			39721
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE			16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET			39737
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE			16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET		39753
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE		16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET		39769
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE			16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET				39785
-#define NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET                             39786
-#define NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE                               8
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_VALUE_RT_OFFSET                  39794
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_VALUE_RT_SIZE                    1024
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_EN_RT_OFFSET                     40818
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_EN_RT_SIZE                       512
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_MODE_RT_OFFSET                   41330
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_MODE_RT_SIZE                     512
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET          41842
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE            512
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_HDR_SEL_RT_OFFSET                42354
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_HDR_SEL_RT_SIZE                  512
-#define NIG_REG_LLH_PF_CLS_FILTERS_MAP_RT_OFFSET                        42866
-#define NIG_REG_LLH_PF_CLS_FILTERS_MAP_RT_SIZE                          32
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                               42898
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                               42899
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                               42900
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                           42901
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                           42902
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                           42903
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                           42904
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                        42905
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                        42906
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                        42907
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                        42908
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                            42909
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                         42910
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET                               42911
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                          42912
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                        42913
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                           42914
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                    42915
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                        42916
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                           42917
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                    42918
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                        42919
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                           42920
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                    42921
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                        42922
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                           42923
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                    42924
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                        42925
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                           42926
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                    42927
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                        42928
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                           42929
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                    42930
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                        42931
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                           42932
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                    42933
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                        42934
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                           42935
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                    42936
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                        42937
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                           42938
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                    42939
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                        42940
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                           42941
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                    42942
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                       42943
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                          42944
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET                   42945
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                       42946
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                          42947
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET                   42948
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                       42949
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                          42950
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET                   42951
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                       42952
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                          42953
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET                   42954
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                       42955
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                          42956
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET                   42957
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                       42958
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                          42959
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET                   42960
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                       42961
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                          42962
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET                   42963
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                       42964
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                          42965
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET                   42966
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                       42967
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                          42968
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET                   42969
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                       42970
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                          42971
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET                   42972
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ20_RT_OFFSET                       42973
-#define PBF_REG_BTB_GUARANTEED_VOQ20_RT_OFFSET                          42974
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ20_RT_OFFSET                   42975
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ21_RT_OFFSET                       42976
-#define PBF_REG_BTB_GUARANTEED_VOQ21_RT_OFFSET                          42977
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ21_RT_OFFSET                   42978
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ22_RT_OFFSET                       42979
-#define PBF_REG_BTB_GUARANTEED_VOQ22_RT_OFFSET                          42980
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ22_RT_OFFSET                   42981
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ23_RT_OFFSET                       42982
-#define PBF_REG_BTB_GUARANTEED_VOQ23_RT_OFFSET                          42983
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ23_RT_OFFSET                   42984
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ24_RT_OFFSET                       42985
-#define PBF_REG_BTB_GUARANTEED_VOQ24_RT_OFFSET                          42986
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ24_RT_OFFSET                   42987
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ25_RT_OFFSET                       42988
-#define PBF_REG_BTB_GUARANTEED_VOQ25_RT_OFFSET                          42989
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ25_RT_OFFSET                   42990
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ26_RT_OFFSET                       42991
-#define PBF_REG_BTB_GUARANTEED_VOQ26_RT_OFFSET                          42992
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ26_RT_OFFSET                   42993
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ27_RT_OFFSET                       42994
-#define PBF_REG_BTB_GUARANTEED_VOQ27_RT_OFFSET                          42995
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ27_RT_OFFSET                   42996
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ28_RT_OFFSET                       42997
-#define PBF_REG_BTB_GUARANTEED_VOQ28_RT_OFFSET                          42998
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ28_RT_OFFSET                   42999
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ29_RT_OFFSET                       43000
-#define PBF_REG_BTB_GUARANTEED_VOQ29_RT_OFFSET                          43001
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ29_RT_OFFSET                   43002
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ30_RT_OFFSET                       43003
-#define PBF_REG_BTB_GUARANTEED_VOQ30_RT_OFFSET                          43004
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ30_RT_OFFSET                   43005
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ31_RT_OFFSET                       43006
-#define PBF_REG_BTB_GUARANTEED_VOQ31_RT_OFFSET                          43007
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ31_RT_OFFSET                   43008
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ32_RT_OFFSET                       43009
-#define PBF_REG_BTB_GUARANTEED_VOQ32_RT_OFFSET                          43010
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ32_RT_OFFSET                   43011
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ33_RT_OFFSET                       43012
-#define PBF_REG_BTB_GUARANTEED_VOQ33_RT_OFFSET                          43013
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ33_RT_OFFSET                   43014
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ34_RT_OFFSET                       43015
-#define PBF_REG_BTB_GUARANTEED_VOQ34_RT_OFFSET                          43016
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ34_RT_OFFSET                   43017
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ35_RT_OFFSET                       43018
-#define PBF_REG_BTB_GUARANTEED_VOQ35_RT_OFFSET                          43019
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ35_RT_OFFSET                   43020
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                    43021
-
-#define RUNTIME_ARRAY_SIZE 43022
-
+#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET				0
+#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET				1
+#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET				2
+#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET				3
+#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET				4
+#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET				5
+#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET				6
+#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET				7
+#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET				8
+#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET				9
+#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET				10
+#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET				11
+#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET				12
+#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET				13
+#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET				14
+#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET				15
+#define DORQ_REG_VF_ICID_BIT_SHIFT_NORM_RT_OFFSET			16
+#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET					17
+#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET				18
+#define IGU_REG_PF_CONFIGURATION_RT_OFFSET				19
+#define IGU_REG_VF_CONFIGURATION_RT_OFFSET				20
+#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET				21
+#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET				22
+#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET				23
+#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET				24
+#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET				25
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET					26
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE					736
+#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET				762
+#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE					736
+#define CAU_REG_PI_MEMORY_RT_OFFSET					1498
+#define CAU_REG_PI_MEMORY_RT_SIZE					4416
+#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET			5914
+#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET			5915
+#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET			5916
+#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET				5917
+#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET				5918
+#define PRS_REG_SEARCH_TCP_RT_OFFSET					5919
+#define PRS_REG_SEARCH_FCOE_RT_OFFSET					5920
+#define PRS_REG_SEARCH_ROCE_RT_OFFSET					5921
+#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET				5922
+#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET				5923
+#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET				5924
+#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET			5925
+#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET		5926
+#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET			5927
+#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET				5928
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET				5929
+#define SRC_REG_FIRSTFREE_RT_OFFSET					5930
+#define SRC_REG_FIRSTFREE_RT_SIZE					2
+#define SRC_REG_LASTFREE_RT_OFFSET					5932
+#define SRC_REG_LASTFREE_RT_SIZE					2
+#define SRC_REG_COUNTFREE_RT_OFFSET					5934
+#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET				5935
+#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET				5936
+#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET				5937
+#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET					5938
+#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET					5939
+#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET					5940
+#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET				5941
+#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET				5942
+#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET				5943
+#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET				5944
+#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET				5945
+#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET				5946
+#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET				5947
+#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET				5948
+#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET				5949
+#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET				5950
+#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET				5951
+#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET				5952
+#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET				5953
+#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET			5954
+#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET			5955
+#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET			5956
+#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET				5957
+#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET				5958
+#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET				5959
+#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET				5960
+#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET				5961
+#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET				5962
+#define PSWRQ2_REG_VF_BASE_RT_OFFSET					5963
+#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET				5964
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET				5965
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET				5966
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET					5967
+#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE					22000
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET					27967
+#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET			27968
+#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET				27969
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET				27970
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET				27971
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET				27972
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET				27973
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET					27974
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET					27975
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET					27976
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET			27977
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET			27978
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET				27979
+#define TM_REG_CONFIG_CONN_MEM_RT_SIZE					416
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET				28395
+#define TM_REG_CONFIG_TASK_MEM_RT_SIZE					512
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET					28907
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET					28908
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET					28909
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET				28910
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET				28911
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET				28912
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET				28913
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET				28914
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET				28915
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET				28916
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET				28917
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET				28918
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET				28919
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET				28920
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET				28921
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET				28922
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET				28923
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET				28924
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET				28925
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET				28926
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET				28927
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET				28928
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET				28929
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET				28930
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET				28931
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET				28932
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET				28933
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET				28934
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET				28935
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET				28936
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET				28937
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET				28938
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET				28939
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET				28940
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET				28941
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET				28942
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET				28943
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET				28944
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET				28945
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET				28946
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET				28947
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET				28948
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET				28949
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET				28950
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET				28951
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET				28952
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET				28953
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET				28954
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET				28955
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET				28956
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET				28957
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET				28958
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET				28959
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET				28960
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET				28961
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET				28962
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET				28963
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET				28964
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET				28965
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET				28966
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET				28967
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET				28968
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET				28969
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET				28970
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET				28971
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET				28972
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET				28973
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET				28974
+#define QM_REG_BASEADDROTHERPQ_RT_SIZE					128
+#define QM_REG_PTRTBLOTHER_RT_OFFSET					29102
+#define QM_REG_PTRTBLOTHER_RT_SIZE					256
+#define QM_REG_VOQCRDLINE_RT_OFFSET					29358
+#define QM_REG_VOQCRDLINE_RT_SIZE					20
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET					29378
+#define QM_REG_VOQINITCRDLINE_RT_SIZE					20
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET				29398
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET				29399
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET				29400
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET				29401
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET				29402
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET				29403
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET				29404
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET				29405
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET				29406
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET				29407
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET				29408
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET				29409
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET				29410
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET				29411
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET				29412
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET				29413
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET				29414
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET				29415
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET				29416
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET				29417
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET				29418
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET				29419
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET				29420
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET				29421
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET				29422
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET				29423
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET				29424
+#define QM_REG_PQTX2PF_0_RT_OFFSET					29425
+#define QM_REG_PQTX2PF_1_RT_OFFSET					29426
+#define QM_REG_PQTX2PF_2_RT_OFFSET					29427
+#define QM_REG_PQTX2PF_3_RT_OFFSET					29428
+#define QM_REG_PQTX2PF_4_RT_OFFSET					29429
+#define QM_REG_PQTX2PF_5_RT_OFFSET					29430
+#define QM_REG_PQTX2PF_6_RT_OFFSET					29431
+#define QM_REG_PQTX2PF_7_RT_OFFSET					29432
+#define QM_REG_PQTX2PF_8_RT_OFFSET					29433
+#define QM_REG_PQTX2PF_9_RT_OFFSET					29434
+#define QM_REG_PQTX2PF_10_RT_OFFSET					29435
+#define QM_REG_PQTX2PF_11_RT_OFFSET					29436
+#define QM_REG_PQTX2PF_12_RT_OFFSET					29437
+#define QM_REG_PQTX2PF_13_RT_OFFSET					29438
+#define QM_REG_PQTX2PF_14_RT_OFFSET					29439
+#define QM_REG_PQTX2PF_15_RT_OFFSET					29440
+#define QM_REG_PQTX2PF_16_RT_OFFSET					29441
+#define QM_REG_PQTX2PF_17_RT_OFFSET					29442
+#define QM_REG_PQTX2PF_18_RT_OFFSET					29443
+#define QM_REG_PQTX2PF_19_RT_OFFSET					29444
+#define QM_REG_PQTX2PF_20_RT_OFFSET					29445
+#define QM_REG_PQTX2PF_21_RT_OFFSET					29446
+#define QM_REG_PQTX2PF_22_RT_OFFSET					29447
+#define QM_REG_PQTX2PF_23_RT_OFFSET					29448
+#define QM_REG_PQTX2PF_24_RT_OFFSET					29449
+#define QM_REG_PQTX2PF_25_RT_OFFSET					29450
+#define QM_REG_PQTX2PF_26_RT_OFFSET					29451
+#define QM_REG_PQTX2PF_27_RT_OFFSET					29452
+#define QM_REG_PQTX2PF_28_RT_OFFSET					29453
+#define QM_REG_PQTX2PF_29_RT_OFFSET					29454
+#define QM_REG_PQTX2PF_30_RT_OFFSET					29455
+#define QM_REG_PQTX2PF_31_RT_OFFSET					29456
+#define QM_REG_PQTX2PF_32_RT_OFFSET					29457
+#define QM_REG_PQTX2PF_33_RT_OFFSET					29458
+#define QM_REG_PQTX2PF_34_RT_OFFSET					29459
+#define QM_REG_PQTX2PF_35_RT_OFFSET					29460
+#define QM_REG_PQTX2PF_36_RT_OFFSET					29461
+#define QM_REG_PQTX2PF_37_RT_OFFSET					29462
+#define QM_REG_PQTX2PF_38_RT_OFFSET					29463
+#define QM_REG_PQTX2PF_39_RT_OFFSET					29464
+#define QM_REG_PQTX2PF_40_RT_OFFSET					29465
+#define QM_REG_PQTX2PF_41_RT_OFFSET					29466
+#define QM_REG_PQTX2PF_42_RT_OFFSET					29467
+#define QM_REG_PQTX2PF_43_RT_OFFSET					29468
+#define QM_REG_PQTX2PF_44_RT_OFFSET					29469
+#define QM_REG_PQTX2PF_45_RT_OFFSET					29470
+#define QM_REG_PQTX2PF_46_RT_OFFSET					29471
+#define QM_REG_PQTX2PF_47_RT_OFFSET					29472
+#define QM_REG_PQTX2PF_48_RT_OFFSET					29473
+#define QM_REG_PQTX2PF_49_RT_OFFSET					29474
+#define QM_REG_PQTX2PF_50_RT_OFFSET					29475
+#define QM_REG_PQTX2PF_51_RT_OFFSET					29476
+#define QM_REG_PQTX2PF_52_RT_OFFSET					29477
+#define QM_REG_PQTX2PF_53_RT_OFFSET					29478
+#define QM_REG_PQTX2PF_54_RT_OFFSET					29479
+#define QM_REG_PQTX2PF_55_RT_OFFSET					29480
+#define QM_REG_PQTX2PF_56_RT_OFFSET					29481
+#define QM_REG_PQTX2PF_57_RT_OFFSET					29482
+#define QM_REG_PQTX2PF_58_RT_OFFSET					29483
+#define QM_REG_PQTX2PF_59_RT_OFFSET					29484
+#define QM_REG_PQTX2PF_60_RT_OFFSET					29485
+#define QM_REG_PQTX2PF_61_RT_OFFSET					29486
+#define QM_REG_PQTX2PF_62_RT_OFFSET					29487
+#define QM_REG_PQTX2PF_63_RT_OFFSET					29488
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET					29489
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET					29490
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET					29491
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET					29492
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET					29493
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET					29494
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET					29495
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET					29496
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET					29497
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET					29498
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET					29499
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET					29500
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET					29501
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET					29502
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET					29503
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET					29504
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET					29505
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET					29506
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET				29507
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET				29508
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET				29509
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET				29510
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET				29511
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET				29512
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET				29513
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET				29514
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET				29515
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET				29516
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET					29517
+#define QM_REG_RLGLBLINCVAL_RT_SIZE					256
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET				29773
+#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE					256
+#define QM_REG_RLGLBLCRD_RT_OFFSET					30029
+#define QM_REG_RLGLBLCRD_RT_SIZE					256
+#define QM_REG_RLGLBLENABLE_RT_OFFSET					30285
+#define QM_REG_RLPFPERIOD_RT_OFFSET					30286
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET				30287
+#define QM_REG_RLPFINCVAL_RT_OFFSET					30288
+#define QM_REG_RLPFINCVAL_RT_SIZE					16
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET					30304
+#define QM_REG_RLPFUPPERBOUND_RT_SIZE					16
+#define QM_REG_RLPFCRD_RT_OFFSET					30320
+#define QM_REG_RLPFCRD_RT_SIZE						16
+#define QM_REG_RLPFENABLE_RT_OFFSET					30336
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET					30337
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET					30338
+#define QM_REG_WFQPFWEIGHT_RT_SIZE					16
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET				30354
+#define QM_REG_WFQPFUPPERBOUND_RT_SIZE					16
+#define QM_REG_WFQPFCRD_RT_OFFSET					30370
+#define QM_REG_WFQPFCRD_RT_SIZE						160
+#define QM_REG_WFQPFENABLE_RT_OFFSET					30530
+#define QM_REG_WFQVPENABLE_RT_OFFSET					30531
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET					30532
+#define QM_REG_BASEADDRTXPQ_RT_SIZE					512
+#define QM_REG_TXPQMAP_RT_OFFSET					31044
+#define QM_REG_TXPQMAP_RT_SIZE						512
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET					31556
+#define QM_REG_WFQVPWEIGHT_RT_SIZE					512
+#define QM_REG_WFQVPCRD_RT_OFFSET					32068
+#define QM_REG_WFQVPCRD_RT_SIZE						512
+#define QM_REG_WFQVPMAP_RT_OFFSET					32580
+#define QM_REG_WFQVPMAP_RT_SIZE						512
+#define QM_REG_PTRTBLTX_RT_OFFSET					33092
+#define QM_REG_PTRTBLTX_RT_SIZE						1024
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET					34116
+#define QM_REG_WFQPFCRD_MSB_RT_SIZE					160
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET				34276
+#define NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET				34277
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET				34278
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET				34279
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET				34280
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET				34281
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET			34282
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET				34283
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE					4
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET				34287
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE				4
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET				34291
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE				32
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET				34323
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE				16
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET				34339
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE				16
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET			34355
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE			16
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET			34371
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE				16
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET					34387
+#define NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET				34388
+#define NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE				8
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET				34396
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET				34397
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET				34398
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET				34399
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET				34400
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET				34401
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET				34402
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET			34403
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET			34404
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET			34405
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET			34406
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET				34407
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET				34408
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET				34409
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET				34410
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET			34411
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET				34412
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET			34413
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET			34414
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET				34415
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET			34416
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET			34417
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET				34418
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET			34419
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET			34420
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET				34421
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET			34422
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET			34423
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET				34424
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET			34425
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET			34426
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET				34427
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET			34428
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET			34429
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET				34430
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET			34431
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET			34432
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET				34433
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET			34434
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET			34435
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET				34436
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET			34437
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET			34438
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET				34439
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET			34440
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET			34441
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET				34442
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET			34443
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET			34444
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET				34445
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET			34446
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET			34447
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET				34448
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET			34449
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET			34450
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET				34451
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET			34452
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET			34453
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET				34454
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET			34455
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET			34456
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET				34457
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET			34458
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET			34459
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET				34460
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET			34461
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET			34462
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET				34463
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET			34464
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET			34465
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET				34466
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET			34467
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET			34468
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET				34469
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET			34470
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET					34471
+
+#define RUNTIME_ARRAY_SIZE 34472
 
 /* Init Callbacks */
 #define DMAE_READY_CB	0
@@ -5648,9 +5450,9 @@ struct e4_eth_conn_context {
 	struct pstorm_eth_conn_st_ctx pstorm_st_context;
 	struct xstorm_eth_conn_st_ctx xstorm_st_context;
 	struct e4_xstorm_eth_conn_ag_ctx xstorm_ag_context;
+	struct e4_tstorm_eth_conn_ag_ctx tstorm_ag_context;
 	struct ystorm_eth_conn_st_ctx ystorm_st_context;
 	struct e4_ystorm_eth_conn_ag_ctx ystorm_ag_context;
-	struct e4_tstorm_eth_conn_ag_ctx tstorm_ag_context;
 	struct e4_ustorm_eth_conn_ag_ctx ustorm_ag_context;
 	struct ustorm_eth_conn_st_ctx ustorm_st_context;
 	struct mstorm_eth_conn_st_ctx mstorm_st_context;
@@ -5680,6 +5482,16 @@ enum eth_error_code {
 	ETH_FILTERS_VNI_ADD_FAIL_FULL,
 	ETH_FILTERS_VNI_ADD_FAIL_DUP,
 	ETH_FILTERS_GFT_UPDATE_FAIL,
+	ETH_RX_QUEUE_FAIL_LOAD_VF_DATA,
+	ETH_FILTERS_GFS_ADD_FILTER_FAIL_MAX_HOPS,
+	ETH_FILTERS_GFS_ADD_FILTER_FAIL_NO_FREE_ENRTY,
+	ETH_FILTERS_GFS_ADD_FILTER_FAIL_ALREADY_EXISTS,
+	ETH_FILTERS_GFS_ADD_FILTER_FAIL_PCI_ERROR,
+	ETH_FILTERS_GFS_ADD_FINLER_FAIL_MAGIC_NUM_ERROR,
+	ETH_FILTERS_GFS_DEL_FILTER_FAIL_MAX_HOPS,
+	ETH_FILTERS_GFS_DEL_FILTER_FAIL_NO_MATCH_ENRTY,
+	ETH_FILTERS_GFS_DEL_FILTER_FAIL_PCI_ERROR,
+	ETH_FILTERS_GFS_DEL_FILTER_FAIL_MAGIC_NUM_ERROR,
 	MAX_ETH_ERROR_CODE
 };
 
@@ -5703,6 +5515,11 @@ enum eth_event_opcode {
 	ETH_EVENT_RX_CREATE_GFT_ACTION,
 	ETH_EVENT_RX_GFT_UPDATE_FILTER,
 	ETH_EVENT_TX_QUEUE_UPDATE,
+	ETH_EVENT_RGFS_ADD_FILTER,
+	ETH_EVENT_RGFS_DEL_FILTER,
+	ETH_EVENT_TGFS_ADD_FILTER,
+	ETH_EVENT_TGFS_DEL_FILTER,
+	ETH_EVENT_GFS_COUNTERS_REPORT_REQUEST,
 	MAX_ETH_EVENT_OPCODE
 };
 
@@ -5795,18 +5612,31 @@ enum eth_ramrod_cmd_id {
 	ETH_RAMROD_RX_CREATE_GFT_ACTION,
 	ETH_RAMROD_GFT_UPDATE_FILTER,
 	ETH_RAMROD_TX_QUEUE_UPDATE,
+	ETH_RAMROD_RGFS_FILTER_ADD,
+	ETH_RAMROD_RGFS_FILTER_DEL,
+	ETH_RAMROD_TGFS_FILTER_ADD,
+	ETH_RAMROD_TGFS_FILTER_DEL,
+	ETH_RAMROD_GFS_COUNTERS_REPORT_REQUEST,
 	MAX_ETH_RAMROD_CMD_ID
 };
 
 /* Return code from eth sp ramrods */
 struct eth_return_code {
 	u8 value;
-#define ETH_RETURN_CODE_ERR_CODE_MASK	0x1F
-#define ETH_RETURN_CODE_ERR_CODE_SHIFT	0
-#define ETH_RETURN_CODE_RESERVED_MASK	0x3
-#define ETH_RETURN_CODE_RESERVED_SHIFT	5
-#define ETH_RETURN_CODE_RX_TX_MASK	0x1
-#define ETH_RETURN_CODE_RX_TX_SHIFT	7
+#define ETH_RETURN_CODE_ERR_CODE_MASK  0x3F
+#define ETH_RETURN_CODE_ERR_CODE_SHIFT 0
+#define ETH_RETURN_CODE_RESERVED_MASK  0x1
+#define ETH_RETURN_CODE_RESERVED_SHIFT 6
+#define ETH_RETURN_CODE_RX_TX_MASK     0x1
+#define ETH_RETURN_CODE_RX_TX_SHIFT    7
+};
+
+/* tx destination enum */
+enum eth_tx_dst_mode_config_enum {
+	ETH_TX_DST_MODE_CONFIG_DISABLE,
+	ETH_TX_DST_MODE_CONFIG_FORWARD_DATA_IN_BD,
+	ETH_TX_DST_MODE_CONFIG_FORWARD_DATA_IN_VPORT,
+	MAX_ETH_TX_DST_MODE_CONFIG_ENUM
 };
 
 /* What to do in case an error occurs */
@@ -5833,8 +5663,10 @@ struct eth_tx_err_vals {
 #define ETH_TX_ERR_VALS_MTU_VIOLATION_SHIFT			5
 #define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_MASK		0x1
 #define ETH_TX_ERR_VALS_ILLEGAL_CONTROL_FRAME_SHIFT		6
-#define ETH_TX_ERR_VALS_RESERVED_MASK				0x1FF
-#define ETH_TX_ERR_VALS_RESERVED_SHIFT				7
+#define ETH_TX_ERR_VALS_ILLEGAL_BD_FLAGS_MASK			0x1
+#define ETH_TX_ERR_VALS_ILLEGAL_BD_FLAGS_SHIFT			7
+#define ETH_TX_ERR_VALS_RESERVED_MASK				0xFF
+#define ETH_TX_ERR_VALS_RESERVED_SHIFT				8
 };
 
 /* vport rss configuration data */
@@ -5864,7 +5696,6 @@ struct eth_vport_rss_config {
 	u8 tbl_size;
 	__le32 reserved2[2];
 	__le16 indirection_table[ETH_RSS_IND_TABLE_ENTRIES_NUM];
-
 	__le32 rss_key[ETH_RSS_KEY_SIZE_REGS];
 	__le32 reserved3[2];
 };
@@ -6066,7 +5897,7 @@ struct rx_update_gft_filter_data {
 	u8 inner_vlan_removal_en;
 };
 
-/* Ramrod data for rx queue start ramrod */
+/* Ramrod data for tx queue start ramrod */
 struct tx_queue_start_ramrod_data {
 	__le16 sb_id;
 	u8 sb_index;
@@ -6079,16 +5910,14 @@ struct tx_queue_start_ramrod_data {
 #define TX_QUEUE_START_RAMROD_DATA_DISABLE_OPPORTUNISTIC_SHIFT	0
 #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_MASK	0x1
 #define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_PKT_DUP_SHIFT	1
-#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_MASK	0x1
-#define TX_QUEUE_START_RAMROD_DATA_TEST_MODE_TX_DEST_SHIFT	2
 #define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_MASK		0x1
-#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_SHIFT		3
+#define TX_QUEUE_START_RAMROD_DATA_PMD_MODE_SHIFT		2
 #define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_MASK		0x1
-#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_SHIFT		4
+#define TX_QUEUE_START_RAMROD_DATA_NOTIFY_EN_SHIFT		3
 #define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_MASK		0x1
-#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_SHIFT		5
-#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_MASK		0x3
-#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_SHIFT		6
+#define TX_QUEUE_START_RAMROD_DATA_PIN_CONTEXT_SHIFT		4
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_MASK		0x7
+#define TX_QUEUE_START_RAMROD_DATA_RESERVED1_SHIFT		5
 	u8 pxp_st_hint;
 	u8 pxp_tph_valid_bd;
 	u8 pxp_tph_valid_pkt;
@@ -6144,18 +5973,22 @@ struct vport_start_ramrod_data {
 	__le16 default_vlan;
 	u8 tx_switching_en;
 	u8 anti_spoofing_en;
-
 	u8 default_vlan_en;
-
 	u8 handle_ptp_pkts;
 	u8 silent_vlan_removal_en;
 	u8 untagged;
 	struct eth_tx_err_vals tx_err_behav;
-
 	u8 zero_placement_offset;
 	u8 ctl_frame_mac_check_en;
 	u8 ctl_frame_ethtype_check_en;
+	u8 reserved0;
+	u8 reserved1;
+	u8 tx_dst_port_mode_config;
+	u8 dst_vport_id;
+	u8 tx_dst_port_mode;
+	u8 dst_vport_id_valid;
 	u8 wipe_inner_vlan_pri_en;
+	u8 reserved2[2];
 	struct eth_in_to_in_pri_map_cfg in_to_in_vlan_pri_map_cfg;
 };
 
@@ -6715,19 +6548,6 @@ struct e4_xstorm_eth_hw_conn_ag_ctx {
 	__le16 conn_dpi;
 };
 
-/* GFT CAM line struct */
-struct gft_cam_line {
-	__le32 camline;
-#define GFT_CAM_LINE_VALID_MASK		0x1
-#define GFT_CAM_LINE_VALID_SHIFT	0
-#define GFT_CAM_LINE_DATA_MASK		0x3FFF
-#define GFT_CAM_LINE_DATA_SHIFT		1
-#define GFT_CAM_LINE_MASK_BITS_MASK	0x3FFF
-#define GFT_CAM_LINE_MASK_BITS_SHIFT	15
-#define GFT_CAM_LINE_RESERVED1_MASK	0x7
-#define GFT_CAM_LINE_RESERVED1_SHIFT	29
-};
-
 /* GFT CAM line struct with fields breakout */
 struct gft_cam_line_mapped {
 	__le32 camline;
@@ -6757,10 +6577,6 @@ struct gft_cam_line_mapped {
 #define GFT_CAM_LINE_MAPPED_RESERVED1_SHIFT			29
 };
 
-union gft_cam_line_union {
-	struct gft_cam_line cam_line;
-	struct gft_cam_line_mapped cam_line_mapped;
-};
 
 /* Used in gft_profile_key: Indication for ip version */
 enum gft_profile_ip_version {
@@ -7039,6 +6855,11 @@ struct mstorm_rdma_task_st_ctx {
 	struct regpair temp[4];
 };
 
+/* The roce task context of Ustorm */
+struct ustorm_rdma_task_st_ctx {
+	struct regpair temp[6];
+};
+
 struct e4_ustorm_rdma_task_ag_ctx {
 	u8 reserved;
 	u8 state;
@@ -7048,8 +6869,8 @@ struct e4_ustorm_rdma_task_ag_ctx {
 #define E4_USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
 #define E4_USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
 #define E4_USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RUNT_VALID_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RUNT_VALID_SHIFT		5
+#define E4_USTORM_RDMA_TASK_AG_CTX_BIT1_MASK			0x1
+#define E4_USTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_MASK	0x3
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_SHIFT	6
 	u8 flags1;
@@ -7079,29 +6900,29 @@ struct e4_ustorm_rdma_task_ag_ctx {
 #define E4_USTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK			0x1
 #define E4_USTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT		7
 	u8 flags3;
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT	0
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT	1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT	2
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT	3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_MASK	0xF
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT	4
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RXMIT_PROD_CONS_EN_MASK	0x1
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RXMIT_PROD_CONS_EN_SHIFT	0
+#define E4_USTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK			0x1
+#define E4_USTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT		1
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_PROD_CONS_EN_MASK	0x1
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_PROD_CONS_EN_SHIFT	2
+#define E4_USTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK			0x1
+#define E4_USTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT		3
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_MASK		0xF
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT		4
 	__le32 dif_err_intervals;
 	__le32 dif_error_1st_interval;
-	__le32 sq_cons;
-	__le32 dif_runt_value;
+	__le32 dif_rxmit_cons;
+	__le32 dif_rxmit_prod;
 	__le32 sge_index;
-	__le32 reg5;
+	__le32 sq_cons;
 	u8 byte2;
 	u8 byte3;
-	__le16 word1;
-	__le16 word2;
+	__le16 dif_write_cons;
+	__le16 dif_write_prod;
 	__le16 word3;
-	__le32 reg6;
-	__le32 reg7;
+	__le32 dif_error_buffer_address_lo;
+	__le32 dif_error_buffer_address_hi;
 };
 
 /* RDMA task context */
@@ -7112,6 +6933,8 @@ struct e4_rdma_task_context {
 	struct e4_mstorm_rdma_task_ag_ctx mstorm_ag_context;
 	struct mstorm_rdma_task_st_ctx mstorm_st_context;
 	struct rdif_task_context rdif_context;
+	struct ustorm_rdma_task_st_ctx ustorm_st_context;
+	struct regpair ustorm_st_padding[2];
 	struct e4_ustorm_rdma_task_ag_ctx ustorm_ag_context;
 };
 
@@ -7147,7 +6970,12 @@ struct rdma_create_cq_ramrod_data {
 	u8 pbl_log_page_size;
 	u8 toggle_bit;
 	__le16 int_timeout;
-	__le16 reserved1;
+	u8 vf_id;
+	u8 flags;
+#define RDMA_CREATE_CQ_RAMROD_DATA_VF_ID_VALID_MASK  0x1
+#define RDMA_CREATE_CQ_RAMROD_DATA_VF_ID_VALID_SHIFT 0
+#define RDMA_CREATE_CQ_RAMROD_DATA_RESERVED1_MASK    0x7F
+#define RDMA_CREATE_CQ_RAMROD_DATA_RESERVED1_SHIFT   1
 };
 
 /* rdma deregister tid ramrod data */
@@ -7191,6 +7019,7 @@ enum rdma_fw_return_code {
 	RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR,
 	RDMA_RETURN_RESIZE_CQ_ERR,
 	RDMA_RETURN_NIG_DRAIN_REQ,
+	RDMA_RETURN_GENERAL_ERR,
 	MAX_RDMA_FW_RETURN_CODE
 };
 
@@ -7204,7 +7033,10 @@ struct rdma_init_func_hdr {
 	u8 relaxed_ordering;
 	__le16 first_reg_srq_id;
 	__le32 reg_srq_base_addr;
-	__le32 reserved;
+	u8 searcher_mode;
+	u8 pvrdma_mode;
+	u8 max_num_ns_log;
+	u8 reserved;
 };
 
 /* rdma function init ramrod data */
@@ -7294,16 +7126,20 @@ struct rdma_resize_cq_ramrod_data {
 #define RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT_SHIFT		0
 #define RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL_MASK	0x1
 #define RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL_SHIFT	1
-#define RDMA_RESIZE_CQ_RAMROD_DATA_RESERVED_MASK		0x3F
-#define RDMA_RESIZE_CQ_RAMROD_DATA_RESERVED_SHIFT		2
+#define RDMA_RESIZE_CQ_RAMROD_DATA_VF_ID_VALID_MASK		0x1
+#define RDMA_RESIZE_CQ_RAMROD_DATA_VF_ID_VALID_SHIFT		2
+#define RDMA_RESIZE_CQ_RAMROD_DATA_RESERVED_MASK		0x1F
+#define RDMA_RESIZE_CQ_RAMROD_DATA_RESERVED_SHIFT		3
 	u8 pbl_log_page_size;
 	__le16 pbl_num_pages;
 	__le32 max_cqes;
 	struct regpair pbl_addr;
 	struct regpair output_params_addr;
+	u8 vf_id;
+	u8 reserved1[7];
 };
 
-/* The rdma storm context of Mstorm */
+/* The rdma SRQ context */
 struct rdma_srq_context {
 	struct regpair temp[8];
 };
@@ -7350,6 +7186,7 @@ enum rdma_tid_type {
 	MAX_RDMA_TID_TYPE
 };
 
+/* The rdma XRC SRQ context */
 struct rdma_xrc_srq_context {
 	struct regpair temp[9];
 };
@@ -7531,12 +7368,12 @@ struct e4_xstorm_roce_conn_ag_ctx {
 #define E4_XSTORM_ROCE_CONN_AG_CTX_BIT10_SHIFT            2
 #define E4_XSTORM_ROCE_CONN_AG_CTX_BIT11_MASK             0x1
 #define E4_XSTORM_ROCE_CONN_AG_CTX_BIT11_SHIFT            3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT12_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT12_SHIFT            4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_MASK        0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_SHIFT       4
 #define E4_XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_MASK        0x1
 #define E4_XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_SHIFT       5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_MASK        0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_SHIFT       6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT14_MASK	       0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT14_SHIFT	       6
 #define E4_XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_MASK      0x1
 #define E4_XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_SHIFT     7
 	u8 flags2;
@@ -7860,9 +7697,9 @@ struct mstorm_roce_conn_st_ctx {
 	struct regpair temp[6];
 };
 
-/* The roce storm context of Ystorm */
+/* The roce storm context of Ustorm */
 struct ustorm_roce_conn_st_ctx {
-	struct regpair temp[12];
+	struct regpair temp[14];
 };
 
 /* roce connection context */
@@ -7880,6 +7717,7 @@ struct e4_roce_conn_context {
 	struct mstorm_roce_conn_st_ctx mstorm_st_context;
 	struct regpair mstorm_st_padding[2];
 	struct ustorm_roce_conn_st_ctx ustorm_st_context;
+	struct regpair ustorm_st_padding[2];
 };
 
 /* roce cqes statistics */
@@ -7934,12 +7772,17 @@ struct roce_create_qp_req_ramrod_data {
 	struct regpair qp_handle_for_cqe;
 	struct regpair qp_handle_for_async;
 	u8 stats_counter_id;
-	u8 reserved3[6];
+	u8 vf_id;
+	u8 vport_id;
 	u8 flags2;
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE_MASK			0x1
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE_SHIFT			0
-#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK			0x7F
-#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT			1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_VF_ID_VALID_MASK			0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_VF_ID_VALID_SHIFT		1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK			0x3F
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT			2
+	u8 name_space;
+	u8 reserved3[3];
 	__le16 regular_latency_phy_queue;
 	__le16 dpi;
 };
@@ -7967,8 +7810,10 @@ struct roce_create_qp_resp_ramrod_data {
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_SHIFT		11
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG_MASK             0x1
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG_SHIFT            16
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_MASK             0x7FFF
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_SHIFT            17
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_VF_ID_VALID_MASK	0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_VF_ID_VALID_SHIFT	17
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_MASK		0x3FFF
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_SHIFT		18
 	__le16 xrc_domain;
 	u8 max_ird;
 	u8 traffic_class;
@@ -7995,10 +7840,14 @@ struct roce_create_qp_resp_ramrod_data {
 	struct regpair qp_handle_for_cqe;
 	struct regpair qp_handle_for_async;
 	__le16 low_latency_phy_queue;
-	u8 reserved2[2];
+	u8 vf_id;
+	u8 vport_id;
 	__le32 cq_cid;
 	__le16 regular_latency_phy_queue;
 	__le16 dpi;
+	__le32 src_qp_id;
+	u8 name_space;
+	u8 reserved3[3];
 };
 
 /* roce DCQCN received statistics */
@@ -8032,6 +7881,8 @@ struct roce_destroy_qp_resp_output_params {
 /* RoCE destroy qp responder ramrod data */
 struct roce_destroy_qp_resp_ramrod_data {
 	struct regpair output_params_addr;
+	__le32 src_qp_id;
+	__le32 reserved;
 };
 
 /* roce error statistics */
@@ -8115,8 +7966,8 @@ struct roce_modify_qp_req_ramrod_data {
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_FLG_SHIFT			9
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_MASK				0x7
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_SHIFT			10
-#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PHYSICAL_QUEUES_FLG_MASK		0x1
-#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PHYSICAL_QUEUES_FLG_SHIFT	13
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PHYSICAL_QUEUE_FLG_MASK		0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PHYSICAL_QUEUE_FLG_SHIFT		13
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_MASK			0x3
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_SHIFT			14
 	u8 fields;
@@ -8162,8 +8013,8 @@ struct roce_modify_qp_resp_ramrod_data {
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG_SHIFT	8
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG_MASK		0x1
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG_SHIFT		9
-#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PHYSICAL_QUEUES_FLG_MASK	0x1
-#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PHYSICAL_QUEUES_FLG_SHIFT	10
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PHYSICAL_QUEUE_FLG_MASK		0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PHYSICAL_QUEUE_FLG_SHIFT	10
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_MASK			0x1F
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_SHIFT			11
 	u8 fields;
@@ -8204,7 +8055,7 @@ struct roce_query_qp_req_ramrod_data {
 /* RoCE query qp responder output params */
 struct roce_query_qp_resp_output_params {
 	__le32 psn;
-	__le32 err_flag;
+	__le32 flags;
 #define ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG_MASK  0x1
 #define ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG_SHIFT 0
 #define ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_RESERVED0_MASK  0x7FFFFFFF
@@ -8271,12 +8122,12 @@ struct e4_xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 #define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT10_SHIFT		2
 #define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_MASK		0x1
 #define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_SHIFT		3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSEM_FLUSH_MASK        0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSEM_FLUSH_SHIFT       5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSDM_FLUSH_MASK        0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSDM_FLUSH_SHIFT       6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSDM_FLUSH_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSDM_FLUSH_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSEM_FLUSH_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSEM_FLUSH_SHIFT	5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_SHIFT		6
 #define E4XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_MASK	0x1
 #define E4XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_SHIFT	7
 	u8 flags2;
@@ -8649,8 +8500,8 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 	u8 flags5;
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK		0x1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT		0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT		1
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_DIF_CNT_EN_MASK		0x1
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_DIF_CNT_EN_SHIFT		1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK		0x1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT		2
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK		0x1
@@ -8663,13 +8514,13 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_SHIFT		6
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_MASK		0x1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_SHIFT		7
-	__le32 reg0;
+	__le32 dif_rxmit_cnt;
 	__le32 snd_nxt_psn;
 	__le32 snd_max_psn;
 	__le32 orq_prod;
 	__le32 reg4;
-	__le32 reg5;
-	__le32 reg6;
+	__le32 dif_acked_cnt;
+	__le32 dif_cnt;
 	__le32 reg7;
 	__le32 reg8;
 	u8 tx_cqe_error_type;
@@ -8680,7 +8531,7 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 	__le16 snd_sq_cons;
 	__le16 conn_dpi;
 	__le16 force_comp_cons;
-	__le32 reg9;
+	__le32 dif_rxmit_acked_cnt;
 	__le32 reg10;
 };
 
@@ -8955,10 +8806,10 @@ struct e4_xstorm_roce_req_conn_ag_ctx {
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT10_SHIFT		2
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_MASK		0x1
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT12_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT12_SHIFT		4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT13_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT13_SHIFT		5
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSDM_FLUSH_MASK		0x1
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSDM_FLUSH_SHIFT		4
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSEM_FLUSH_MASK		0x1
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSEM_FLUSH_SHIFT		5
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_MASK		0x1
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_SHIFT	6
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_YSTORM_FLUSH_MASK	0x1
@@ -9184,10 +9035,10 @@ struct e4_xstorm_roce_resp_conn_ag_ctx {
 #define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT10_SHIFT		2
 #define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_MASK		0x1
 #define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT12_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT12_SHIFT		4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT13_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT13_SHIFT		5
+#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSDM_FLUSH_MASK		0x1
+#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSDM_FLUSH_SHIFT	4
+#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSEM_FLUSH_MASK		0x1
+#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSEM_FLUSH_SHIFT	5
 #define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_MASK	0x1
 #define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_SHIFT	6
 #define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_YSTORM_FLUSH_MASK	0x1
@@ -9914,7 +9765,7 @@ struct mstorm_iwarp_conn_st_ctx {
 
 /* The iwarp storm context of Ustorm */
 struct ustorm_iwarp_conn_st_ctx {
-	__le32 reserved[24];
+	struct regpair reserved[14];
 };
 
 /* iwarp connection context */
@@ -9932,6 +9783,7 @@ struct e4_iwarp_conn_context {
 	struct regpair tstorm_st_padding[2];
 	struct mstorm_iwarp_conn_st_ctx mstorm_st_context;
 	struct ustorm_iwarp_conn_st_ctx ustorm_st_context;
+	struct regpair ustorm_st_padding[2];
 };
 
 /* iWARP create QP params passed by driver to FW in CreateQP Request Ramrod */
@@ -9984,7 +9836,8 @@ enum iwarp_eqe_async_opcode {
 
 struct iwarp_eqe_data_mpa_async_completion {
 	__le16 ulp_data_len;
-	u8 reserved[6];
+	u8 rtr_type_sent;
+	u8 reserved[5];
 };
 
 struct iwarp_eqe_data_tcp_async_completion {
@@ -10009,7 +9862,7 @@ enum iwarp_eqe_sync_opcode {
 
 /* iWARP EQE completion status */
 enum iwarp_fw_return_code {
-	IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET = 5,
+	IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET = 6,
 	IWARP_CONN_ERROR_TCP_CONNECTION_RST,
 	IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT,
 	IWARP_CONN_ERROR_MPA_ERROR_REJECT,
@@ -10178,8 +10031,8 @@ struct iwarp_rxmit_stats_drv {
  * offload ramrod.
  */
 struct iwarp_tcp_offload_ramrod_data {
-	struct iwarp_offload_params iwarp;
 	struct tcp_offload_params_opt2 tcp;
+	struct iwarp_offload_params iwarp;
 };
 
 /* iWARP MPA negotiation types */
@@ -11471,8 +11324,8 @@ struct e4_tstorm_iscsi_conn_ag_ctx {
 	u8 flags3;
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_MASK		0x3
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_SHIFT		0
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF10_MASK			0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF10_SHIFT			2
+#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_MASK	0x3
+#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_SHIFT	2
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK			0x1
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT			4
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_EN_MASK	0x1
@@ -11494,8 +11347,8 @@ struct e4_tstorm_iscsi_conn_ag_ctx {
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_CF8EN_SHIFT		4
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	5
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF10EN_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF10EN_SHIFT	6
+#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_EN_MASK	0x1
+#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_EN_SHIFT	6
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags5;
@@ -11727,7 +11580,7 @@ struct e4_ystorm_iscsi_conn_ag_ctx {
 /* The trace in the buffer */
 #define MFW_TRACE_EVENTID_MASK          0x00ffff
 #define MFW_TRACE_PRM_SIZE_MASK         0x0f0000
-#define MFW_TRACE_PRM_SIZE_SHIFT        16
+#define MFW_TRACE_PRM_SIZE_OFFSET	16
 #define MFW_TRACE_ENTRY_SIZE            3
 
 struct mcp_trace {
@@ -12485,6 +12338,11 @@ enum resource_id_enum {
 	RESOURCE_LL2_QUEUE_E = 15,
 	RESOURCE_RDMA_STATS_QUEUE_E = 16,
 	RESOURCE_BDQ_E = 17,
+	RESOURCE_QCN_E = 18,
+	RESOURCE_LLH_FILTER_E = 19,
+	RESOURCE_VF_MAC_ADDR = 20,
+	RESOURCE_LL2_CQS_E = 21,
+	RESOURCE_VF_CNQS = 22,
 	RESOURCE_MAX_NUM,
 	RESOURCE_NUM_INVALID = 0xFFFFFFFF
 };
@@ -12675,7 +12533,10 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT		3
 
 #define DRV_MB_PARAM_NVM_PUT_FILE_BEGIN_MBI     0x3
+#define DRV_MB_PARAM_NVM_OFFSET_OFFSET          0
+#define DRV_MB_PARAM_NVM_OFFSET_MASK            0x00FFFFFF
 #define DRV_MB_PARAM_NVM_LEN_OFFSET		24
+#define DRV_MB_PARAM_NVM_LEN_MASK               0xFF000000
 
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT	0
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK	0x000000FF
@@ -13436,6 +13297,21 @@ enum nvm_image_type {
 	NVM_TYPE_FCOE_CFG = 0x1f,
 	NVM_TYPE_ETH_PHY_FW1 = 0x20,
 	NVM_TYPE_ETH_PHY_FW2 = 0x21,
+	NVM_TYPE_BDN = 0x22,
+	NVM_TYPE_8485X_PHY_FW = 0x23,
+	NVM_TYPE_PUB_KEY = 0x24,
+	NVM_TYPE_RECOVERY = 0x25,
+	NVM_TYPE_PLDM = 0x26,
+	NVM_TYPE_UPK1 = 0x27,
+	NVM_TYPE_UPK2 = 0x28,
+	NVM_TYPE_MASTER_KC = 0x29,
+	NVM_TYPE_BACKUP_KC = 0x2a,
+	NVM_TYPE_HW_DUMP = 0x2b,
+	NVM_TYPE_HW_DUMP_OUT = 0x2c,
+	NVM_TYPE_BIN_NVM_META = 0x30,
+	NVM_TYPE_ROM_TEST = 0xf0,
+	NVM_TYPE_88X33X0_PHY_FW = 0x31,
+	NVM_TYPE_88X33X0_PHY_SLAVE_FW = 0x32,
 	NVM_TYPE_MAX,
 };
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index a4de9e3ef72c..4ab8cfaf63d1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -393,7 +393,7 @@ u32 qed_vfid_to_concrete(struct qed_hwfn *p_hwfn, u8 vfid)
 
 /* DMAE */
 #define QED_DMAE_FLAGS_IS_SET(params, flag) \
-	((params) != NULL && ((params)->flags & QED_DMAE_FLAG_##flag))
+	((params) != NULL && GET_FIELD((params)->flags, QED_DMAE_PARAMS_##flag))
 
 static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 			    const u8 is_src_type_grc,
@@ -408,62 +408,55 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
 	 * 0- The source is the PCIe
 	 * 1- The source is the GRC.
 	 */
-	opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC
-				   : DMAE_CMD_SRC_MASK_PCIE) <<
-		   DMAE_CMD_SRC_SHIFT;
-	src_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_SRC) ?
-		   p_params->src_pfid : p_hwfn->rel_pf_id;
-	opcode |= ((src_pfid & DMAE_CMD_SRC_PF_ID_MASK) <<
-		   DMAE_CMD_SRC_PF_ID_SHIFT);
+	SET_FIELD(opcode, DMAE_CMD_SRC,
+		  (is_src_type_grc ? dmae_cmd_src_grc : dmae_cmd_src_pcie));
+	src_pfid = QED_DMAE_FLAGS_IS_SET(p_params, SRC_PF_VALID) ?
+	    p_params->src_pfid : p_hwfn->rel_pf_id;
+	SET_FIELD(opcode, DMAE_CMD_SRC_PF_ID, src_pfid);
 
 	/* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */
-	opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC
-				   : DMAE_CMD_DST_MASK_PCIE) <<
-		   DMAE_CMD_DST_SHIFT;
-	dst_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_DST) ?
-		   p_params->dst_pfid : p_hwfn->rel_pf_id;
-	opcode |= ((dst_pfid & DMAE_CMD_DST_PF_ID_MASK) <<
-		   DMAE_CMD_DST_PF_ID_SHIFT);
+	SET_FIELD(opcode, DMAE_CMD_DST,
+		  (is_dst_type_grc ? dmae_cmd_dst_grc : dmae_cmd_dst_pcie));
+	dst_pfid = QED_DMAE_FLAGS_IS_SET(p_params, DST_PF_VALID) ?
+	    p_params->dst_pfid : p_hwfn->rel_pf_id;
+	SET_FIELD(opcode, DMAE_CMD_DST_PF_ID, dst_pfid);
+
 
 	/* Whether to write a completion word to the completion destination:
 	 * 0-Do not write a completion word
 	 * 1-Write the completion word
 	 */
-	opcode |= (DMAE_CMD_COMP_WORD_EN_MASK << DMAE_CMD_COMP_WORD_EN_SHIFT);
-	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
-		   DMAE_CMD_SRC_ADDR_RESET_SHIFT);
+	SET_FIELD(opcode, DMAE_CMD_COMP_WORD_EN, 1);
+	SET_FIELD(opcode, DMAE_CMD_SRC_ADDR_RESET, 1);
 
 	if (QED_DMAE_FLAGS_IS_SET(p_params, COMPLETION_DST))
-		opcode |= (1 << DMAE_CMD_COMP_FUNC_SHIFT);
+		SET_FIELD(opcode, DMAE_CMD_COMP_FUNC, 1);
 
-	opcode |= (DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT);
+	/* swapping mode 3 - big endian */
+	SET_FIELD(opcode, DMAE_CMD_ENDIANITY_MODE, DMAE_CMD_ENDIANITY);
 
-	port_id = (QED_DMAE_FLAGS_IS_SET(p_params, PORT)) ?
-		   p_params->port_id : p_hwfn->port_id;
-	opcode |= (port_id << DMAE_CMD_PORT_ID_SHIFT);
+	port_id = (QED_DMAE_FLAGS_IS_SET(p_params, PORT_VALID)) ?
+	    p_params->port_id : p_hwfn->port_id;
+	SET_FIELD(opcode, DMAE_CMD_PORT_ID, port_id);
 
 	/* reset source address in next go */
-	opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
-		   DMAE_CMD_SRC_ADDR_RESET_SHIFT);
+	SET_FIELD(opcode, DMAE_CMD_SRC_ADDR_RESET, 1);
 
 	/* reset dest address in next go */
-	opcode |= (DMAE_CMD_DST_ADDR_RESET_MASK <<
-		   DMAE_CMD_DST_ADDR_RESET_SHIFT);
+	SET_FIELD(opcode, DMAE_CMD_DST_ADDR_RESET, 1);
 
 	/* SRC/DST VFID: all 1's - pf, otherwise VF id */
-	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_SRC)) {
-		opcode |= 1 << DMAE_CMD_SRC_VF_ID_VALID_SHIFT;
-		opcode_b |= p_params->src_vfid << DMAE_CMD_SRC_VF_ID_SHIFT;
+	if (QED_DMAE_FLAGS_IS_SET(p_params, SRC_VF_VALID)) {
+		SET_FIELD(opcode, DMAE_CMD_SRC_VF_ID_VALID, 1);
+		SET_FIELD(opcode_b, DMAE_CMD_SRC_VF_ID, p_params->src_vfid);
 	} else {
-		opcode_b |= DMAE_CMD_SRC_VF_ID_MASK <<
-			    DMAE_CMD_SRC_VF_ID_SHIFT;
+		SET_FIELD(opcode_b, DMAE_CMD_SRC_VF_ID, 0xFF);
 	}
-
-	if (QED_DMAE_FLAGS_IS_SET(p_params, VF_DST)) {
-		opcode |= 1 << DMAE_CMD_DST_VF_ID_VALID_SHIFT;
-		opcode_b |= p_params->dst_vfid << DMAE_CMD_DST_VF_ID_SHIFT;
+	if (QED_DMAE_FLAGS_IS_SET(p_params, DST_VF_VALID)) {
+		SET_FIELD(opcode, DMAE_CMD_DST_VF_ID_VALID, 1);
+		SET_FIELD(opcode_b, DMAE_CMD_DST_VF_ID, p_params->dst_vfid);
 	} else {
-		opcode_b |= DMAE_CMD_DST_VF_ID_MASK << DMAE_CMD_DST_VF_ID_SHIFT;
+		SET_FIELD(opcode_b, DMAE_CMD_DST_VF_ID, 0xFF);
 	}
 
 	p_hwfn->dmae_info.p_dmae_cmd->opcode = cpu_to_le32(opcode);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index d6430dfebd83..2f1049b0b93a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -44,9 +44,9 @@
 #define CDU_VALIDATION_DEFAULT_CFG	61
 
 static u16 con_region_offsets[3][NUM_OF_CONNECTION_TYPES_E4] = {
-	{400, 336, 352, 304, 304, 384, 416, 352},	/* region 3 offsets */
-	{528, 496, 416, 448, 448, 512, 544, 480},	/* region 4 offsets */
-	{608, 544, 496, 512, 576, 592, 624, 560}	/* region 5 offsets */
+	{400, 336, 352, 368, 304, 384, 416, 352},	/* region 3 offsets */
+	{528, 496, 416, 512, 448, 512, 544, 480},	/* region 4 offsets */
+	{608, 544, 496, 576, 576, 592, 624, 560}	/* region 5 offsets */
 };
 
 static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
@@ -61,6 +61,9 @@ static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 								0x100) - 1 : 0)
 #define QM_INVALID_PQ_ID		0xffff
 
+/* Max link speed (in Mbps) */
+#define QM_MAX_LINK_SPEED               100000
+
 /* Feature enable */
 #define QM_BYPASS_EN	1
 #define QM_BYTE_CRD_EN	1
@@ -128,8 +131,6 @@ static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 /* Pure LB CmdQ lines (+spare) */
 #define PBF_CMDQ_PURE_LB_LINES	150
 
-#define PBF_CMDQ_LINES_E5_RSVD_RATIO	8
-
 #define PBF_CMDQ_LINES_RT_OFFSET(ext_voq) \
 	(PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET + \
 	 (ext_voq) * (PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET - \
@@ -140,6 +141,9 @@ static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 	 (ext_voq) * (PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET - \
 		PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET))
 
+/* Returns the VOQ line credit for the specified number of PBF command lines.
+ * PBF lines are specified in 256b units.
+ */
 #define QM_VOQ_LINE_CRD(pbf_cmd_lines) \
 	((((pbf_cmd_lines) - 4) * 2) | QM_LINE_CRD_REG_SIGN_BIT)
 
@@ -178,14 +182,14 @@ static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 		  cmd ## _ ## field, \
 		  value)
 
-#define QM_INIT_TX_PQ_MAP(p_hwfn, map, chip, pq_id, rl_valid, vp_pq_id, rl_id, \
+#define QM_INIT_TX_PQ_MAP(p_hwfn, map, chip, pq_id, vp_pq_id, rl_valid, rl_id, \
 			  ext_voq, wrr) \
 	do { \
 		typeof(map) __map; \
 		memset(&__map, 0, sizeof(__map)); \
 		SET_FIELD(__map.reg, QM_RF_PQ_MAP_ ## chip ## _PQ_VALID, 1); \
 		SET_FIELD(__map.reg, QM_RF_PQ_MAP_ ## chip ## _RL_VALID, \
-			  rl_valid); \
+			  rl_valid ? 1 : 0);\
 		SET_FIELD(__map.reg, QM_RF_PQ_MAP_ ## chip ## _VP_PQ_ID, \
 			  vp_pq_id); \
 		SET_FIELD(__map.reg, QM_RF_PQ_MAP_ ## chip ## _RL_ID, rl_id); \
@@ -200,9 +204,12 @@ static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 #define WRITE_PQ_INFO_TO_RAM	1
 #define PQ_INFO_ELEMENT(vp, pf, tc, port, rl_valid, rl) \
 	(((vp) << 0) | ((pf) << 12) | ((tc) << 16) | ((port) << 20) | \
-	((rl_valid) << 22) | ((rl) << 24))
+	((rl_valid ? 1 : 0) << 22) | (((rl) & 255) << 24) | \
+	(((rl) >> 8) << 9))
+
 #define PQ_INFO_RAM_GRC_ADDRESS(pq_id) \
-	(XSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM + 21776 + (pq_id) * 4)
+	XSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM + \
+	XSTORM_PQ_INFO_OFFSET(pq_id)
 
 /******************** INTERNAL IMPLEMENTATION *********************/
 
@@ -228,9 +235,6 @@ static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn, bool pf_rl_en)
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLPFVOQENABLE_RT_OFFSET,
 			     (u32)voq_bit_mask);
-		if (num_ext_voqs >= 32)
-			STORE_RT_REG(p_hwfn, QM_REG_RLPFVOQENABLE_MSB_RT_OFFSET,
-				     (u32)(voq_bit_mask >> 32));
 
 		/* Write RL period */
 		STORE_RT_REG(p_hwfn,
@@ -259,12 +263,12 @@ static void qed_enable_pf_wfq(struct qed_hwfn *p_hwfn, bool pf_wfq_en)
 			     QM_WFQ_UPPER_BOUND);
 }
 
-/* Prepare VPORT RL enable/disable runtime init values */
-static void qed_enable_vport_rl(struct qed_hwfn *p_hwfn, bool vport_rl_en)
+/* Prepare global RL enable/disable runtime init values */
+static void qed_enable_global_rl(struct qed_hwfn *p_hwfn, bool global_rl_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_RLGLBLENABLE_RT_OFFSET,
-		     vport_rl_en ? 1 : 0);
-	if (vport_rl_en) {
+		     global_rl_en ? 1 : 0);
+	if (global_rl_en) {
 		/* Write RL period (use timer 0 only) */
 		STORE_RT_REG(p_hwfn,
 			     QM_REG_RLGLBLPERIOD_0_RT_OFFSET,
@@ -331,8 +335,7 @@ static void qed_cmdq_lines_rt_init(
 			continue;
 
 		/* Find number of command queue lines to divide between the
-		 * active physical TCs. In E5, 1/8 of the lines are reserved.
-		 * the lines for pure LB TC are subtracted.
+		 * active physical TCs.
 		 */
 		phys_lines = port_params[port_id].num_pbf_cmd_lines;
 		phys_lines -= PBF_CMDQ_PURE_LB_LINES;
@@ -361,11 +364,30 @@ static void qed_cmdq_lines_rt_init(
 		ext_voq = qed_get_ext_voq(p_hwfn,
 					  port_id,
 					  PURE_LB_TC, max_phys_tcs_per_port);
-		qed_cmdq_lines_voq_rt_init(p_hwfn,
-					   ext_voq, PBF_CMDQ_PURE_LB_LINES);
+		qed_cmdq_lines_voq_rt_init(p_hwfn, ext_voq,
+					   PBF_CMDQ_PURE_LB_LINES);
 	}
 }
 
+/* Prepare runtime init values to allocate guaranteed BTB blocks for the
+ * specified port. The guaranteed BTB space is divided between the TCs as
+ * follows (shared space Is currently not used):
+ * 1. Parameters:
+ *    B - BTB blocks for this port
+ *    C - Number of physical TCs for this port
+ * 2. Calculation:
+ *    a. 38 blocks (9700B jumbo frame) are allocated for global per port
+ *	 headroom.
+ *    b. B = B - 38 (remainder after global headroom allocation).
+ *    c. MAX(38,B/(C+0.7)) blocks are allocated for the pure LB VOQ.
+ *    d. B = B - MAX(38, B/(C+0.7)) (remainder after pure LB allocation).
+ *    e. B/C blocks are allocated for each physical TC.
+ * Assumptions:
+ * - MTU is up to 9700 bytes (38 blocks)
+ * - All TCs are considered symmetrical (same rate and packet size)
+ * - No optimization for lossy TC (all are considered lossless). Shared space
+ *   is not enabled and allocated for each TC.
+ */
 static void qed_btb_blocks_rt_init(
 	struct qed_hwfn *p_hwfn,
 	u8 max_ports_per_engine,
@@ -424,6 +446,34 @@ static void qed_btb_blocks_rt_init(
 	}
 }
 
+/* Prepare runtime init values for the specified RL.
+ * Set max link speed (100Gbps) per rate limiter.
+ * Return -1 on error.
+ */
+static int qed_global_rl_rt_init(struct qed_hwfn *p_hwfn)
+{
+	u32 upper_bound = QM_VP_RL_UPPER_BOUND(QM_MAX_LINK_SPEED) |
+			  (u32)QM_RL_CRD_REG_SIGN_BIT;
+	u32 inc_val;
+	u16 rl_id;
+
+	/* Go over all global RLs */
+	for (rl_id = 0; rl_id < MAX_QM_GLOBAL_RLS; rl_id++) {
+		inc_val = QM_RL_INC_VAL(QM_MAX_LINK_SPEED);
+
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLCRD_RT_OFFSET + rl_id,
+			     (u32)QM_RL_CRD_REG_SIGN_BIT);
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLUPPERBOUND_RT_OFFSET + rl_id,
+			     upper_bound);
+		STORE_RT_REG(p_hwfn,
+			     QM_REG_RLGLBLINCVAL_RT_OFFSET + rl_id, inc_val);
+	}
+
+	return 0;
+}
+
 /* Prepare Tx PQ mapping runtime init values for the specified PF */
 static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
@@ -460,18 +510,17 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 
 	/* Go over all Tx PQs */
 	for (i = 0, pq_id = p_params->start_pq; i < num_pqs; i++, pq_id++) {
-		u8 ext_voq, vport_id_in_pf, tc_id = pq_params[i].tc_id;
-		u32 max_qm_global_rls = MAX_QM_GLOBAL_RLS;
+		u16 *p_first_tx_pq_id, vport_id_in_pf;
 		struct qm_rf_pq_map_e4 tx_pq_map;
-		bool is_vf_pq, rl_valid;
-		u16 *p_first_tx_pq_id;
+		u8 tc_id = pq_params[i].tc_id;
+		bool is_vf_pq;
+		u8 ext_voq;
 
 		ext_voq = qed_get_ext_voq(p_hwfn,
 					  pq_params[i].port_id,
 					  tc_id,
 					  p_params->max_phys_tcs_per_port);
 		is_vf_pq = (i >= p_params->num_pf_pqs);
-		rl_valid = pq_params[i].rl_valid > 0;
 
 		/* Update first Tx PQ of VPORT/TC */
 		vport_id_in_pf = pq_params[i].vport_id - p_params->start_vport;
@@ -492,21 +541,14 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 				     map_val);
 		}
 
-		/* Check RL ID */
-		if (rl_valid && pq_params[i].vport_id >= max_qm_global_rls) {
-			DP_NOTICE(p_hwfn,
-				  "Invalid VPORT ID for rate limiter configuration\n");
-			rl_valid = false;
-		}
-
 		/* Prepare PQ map entry */
 		QM_INIT_TX_PQ_MAP(p_hwfn,
 				  tx_pq_map,
 				  E4,
 				  pq_id,
-				  rl_valid ? 1 : 0,
 				  *p_first_tx_pq_id,
-				  rl_valid ? pq_params[i].vport_id : 0,
+				  pq_params[i].rl_valid,
+				  pq_params[i].rl_id,
 				  ext_voq, pq_params[i].wrr_group);
 
 		/* Set PQ base address */
@@ -529,9 +571,8 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 						  p_params->pf_id,
 						  tc_id,
 						  pq_params[i].port_id,
-						  rl_valid ? 1 : 0,
-						  rl_valid ?
-						  pq_params[i].vport_id : 0);
+						  pq_params[i].rl_valid,
+						  pq_params[i].rl_id);
 			qed_wr(p_hwfn, p_ptt, PQ_INFO_RAM_GRC_ADDRESS(pq_id),
 			       pq_info);
 		}
@@ -669,19 +710,19 @@ static int qed_pf_rl_rt_init(struct qed_hwfn *p_hwfn, u8 pf_id, u32 pf_rl)
  * Return -1 on error.
  */
 static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn,
-			      u8 num_vports,
+			      u16 num_vports,
 			      struct init_qm_vport_params *vport_params)
 {
-	u16 vport_pq_id;
+	u16 vport_pq_id, i;
 	u32 inc_val;
-	u8 tc, i;
+	u8 tc;
 
 	/* Go over all PF VPORTs */
 	for (i = 0; i < num_vports; i++) {
-		if (!vport_params[i].vport_wfq)
+		if (!vport_params[i].wfq)
 			continue;
 
-		inc_val = QM_WFQ_INC_VAL(vport_params[i].vport_wfq);
+		inc_val = QM_WFQ_INC_VAL(vport_params[i].wfq);
 		if (inc_val > QM_WFQ_MAX_INC_VAL) {
 			DP_NOTICE(p_hwfn,
 				  "Invalid VPORT WFQ weight configuration\n");
@@ -706,48 +747,6 @@ static int qed_vp_wfq_rt_init(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-/* Prepare VPORT RL runtime init values for the specified VPORTs.
- * Return -1 on error.
- */
-static int qed_vport_rl_rt_init(struct qed_hwfn *p_hwfn,
-				u8 start_vport,
-				u8 num_vports,
-				u32 link_speed,
-				struct init_qm_vport_params *vport_params)
-{
-	u8 i, vport_id;
-	u32 inc_val;
-
-	if (start_vport + num_vports >= MAX_QM_GLOBAL_RLS) {
-		DP_NOTICE(p_hwfn,
-			  "Invalid VPORT ID for rate limiter configuration\n");
-		return -1;
-	}
-
-	/* Go over all PF VPORTs */
-	for (i = 0, vport_id = start_vport; i < num_vports; i++, vport_id++) {
-		inc_val = QM_RL_INC_VAL(vport_params[i].vport_rl ?
-			  vport_params[i].vport_rl :
-			  link_speed);
-		if (inc_val > QM_VP_RL_MAX_INC_VAL(link_speed)) {
-			DP_NOTICE(p_hwfn,
-				  "Invalid VPORT rate-limit configuration\n");
-			return -1;
-		}
-
-		STORE_RT_REG(p_hwfn, QM_REG_RLGLBLCRD_RT_OFFSET + vport_id,
-			     (u32)QM_RL_CRD_REG_SIGN_BIT);
-		STORE_RT_REG(p_hwfn,
-			     QM_REG_RLGLBLUPPERBOUND_RT_OFFSET + vport_id,
-			     QM_VP_RL_UPPER_BOUND(link_speed) |
-			     (u32)QM_RL_CRD_REG_SIGN_BIT);
-		STORE_RT_REG(p_hwfn, QM_REG_RLGLBLINCVAL_RT_OFFSET + vport_id,
-			     inc_val);
-	}
-
-	return 0;
-}
-
 static bool qed_poll_on_qm_cmd_ready(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt)
 {
@@ -799,23 +798,20 @@ u32 qed_qm_pf_mem_size(u32 num_pf_cids,
 int qed_qm_common_rt_init(struct qed_hwfn *p_hwfn,
 			  struct qed_qm_common_rt_init_params *p_params)
 {
-	/* Init AFullOprtnstcCrdMask */
-	u32 mask = (QM_OPPOR_LINE_VOQ_DEF <<
-		    QM_RF_OPPORTUNISTIC_MASK_LINEVOQ_SHIFT) |
-		   (QM_BYTE_CRD_EN << QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ_SHIFT) |
-		   (p_params->pf_wfq_en <<
-		    QM_RF_OPPORTUNISTIC_MASK_PFWFQ_SHIFT) |
-		   (p_params->vport_wfq_en <<
-		    QM_RF_OPPORTUNISTIC_MASK_VPWFQ_SHIFT) |
-		   (p_params->pf_rl_en <<
-		    QM_RF_OPPORTUNISTIC_MASK_PFRL_SHIFT) |
-		   (p_params->vport_rl_en <<
-		    QM_RF_OPPORTUNISTIC_MASK_VPQCNRL_SHIFT) |
-		   (QM_OPPOR_FW_STOP_DEF <<
-		    QM_RF_OPPORTUNISTIC_MASK_FWPAUSE_SHIFT) |
-		   (QM_OPPOR_PQ_EMPTY_DEF <<
-		    QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY_SHIFT);
+	u32 mask = 0;
 
+	/* Init AFullOprtnstcCrdMask */
+	SET_FIELD(mask, QM_RF_OPPORTUNISTIC_MASK_LINEVOQ,
+		  QM_OPPOR_LINE_VOQ_DEF);
+	SET_FIELD(mask, QM_RF_OPPORTUNISTIC_MASK_BYTEVOQ, QM_BYTE_CRD_EN);
+	SET_FIELD(mask, QM_RF_OPPORTUNISTIC_MASK_PFWFQ, p_params->pf_wfq_en);
+	SET_FIELD(mask, QM_RF_OPPORTUNISTIC_MASK_VPWFQ, p_params->vport_wfq_en);
+	SET_FIELD(mask, QM_RF_OPPORTUNISTIC_MASK_PFRL, p_params->pf_rl_en);
+	SET_FIELD(mask, QM_RF_OPPORTUNISTIC_MASK_VPQCNRL,
+		  p_params->global_rl_en);
+	SET_FIELD(mask, QM_RF_OPPORTUNISTIC_MASK_FWPAUSE, QM_OPPOR_FW_STOP_DEF);
+	SET_FIELD(mask,
+		  QM_RF_OPPORTUNISTIC_MASK_QUEUEEMPTY, QM_OPPOR_PQ_EMPTY_DEF);
 	STORE_RT_REG(p_hwfn, QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET, mask);
 
 	/* Enable/disable PF RL */
@@ -824,8 +820,8 @@ int qed_qm_common_rt_init(struct qed_hwfn *p_hwfn,
 	/* Enable/disable PF WFQ */
 	qed_enable_pf_wfq(p_hwfn, p_params->pf_wfq_en);
 
-	/* Enable/disable VPORT RL */
-	qed_enable_vport_rl(p_hwfn, p_params->vport_rl_en);
+	/* Enable/disable global RL */
+	qed_enable_global_rl(p_hwfn, p_params->global_rl_en);
 
 	/* Enable/disable VPORT WFQ */
 	qed_enable_vport_wfq(p_hwfn, p_params->vport_wfq_en);
@@ -842,6 +838,8 @@ int qed_qm_common_rt_init(struct qed_hwfn *p_hwfn,
 			       p_params->max_phys_tcs_per_port,
 			       p_params->port_params);
 
+	qed_global_rl_rt_init(p_hwfn);
+
 	return 0;
 }
 
@@ -853,7 +851,9 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
 	u32 other_mem_size_4kb = QM_PQ_MEM_4KB(p_params->num_pf_cids +
 					       p_params->num_tids) *
 				 QM_OTHER_PQS_PER_PF;
-	u8 tc, i;
+	u16 i;
+	u8 tc;
+
 
 	/* Clear first Tx PQ ID array for each VPORT */
 	for (i = 0; i < p_params->num_vports; i++)
@@ -878,16 +878,10 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
 	if (qed_pf_rl_rt_init(p_hwfn, p_params->pf_id, p_params->pf_rl))
 		return -1;
 
-	/* Set VPORT WFQ */
+	/* Init VPORT WFQ */
 	if (qed_vp_wfq_rt_init(p_hwfn, p_params->num_vports, vport_params))
 		return -1;
 
-	/* Set VPORT RL */
-	if (qed_vport_rl_rt_init(p_hwfn, p_params->start_vport,
-				 p_params->num_vports, p_params->link_speed,
-				 vport_params))
-		return -1;
-
 	return 0;
 }
 
@@ -925,18 +919,19 @@ int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
 
 int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
-		       u16 first_tx_pq_id[NUM_OF_TCS], u16 vport_wfq)
+		       u16 first_tx_pq_id[NUM_OF_TCS], u16 wfq)
 {
 	u16 vport_pq_id;
 	u32 inc_val;
 	u8 tc;
 
-	inc_val = QM_WFQ_INC_VAL(vport_wfq);
+	inc_val = QM_WFQ_INC_VAL(wfq);
 	if (!inc_val || inc_val > QM_WFQ_MAX_INC_VAL) {
-		DP_NOTICE(p_hwfn, "Invalid VPORT WFQ weight configuration\n");
+		DP_NOTICE(p_hwfn, "Invalid VPORT WFQ configuration.\n");
 		return -1;
 	}
 
+	/* A VPORT can have several VPORT PQ IDs for various TCs */
 	for (tc = 0; tc < NUM_OF_TCS; tc++) {
 		vport_pq_id = first_tx_pq_id[tc];
 		if (vport_pq_id != QM_INVALID_PQ_ID)
@@ -948,28 +943,20 @@ int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-int qed_init_vport_rl(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt,
-		      u8 vport_id, u32 vport_rl, u32 link_speed)
+int qed_init_global_rl(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt, u16 rl_id, u32 rate_limit)
 {
-	u32 inc_val, max_qm_global_rls = MAX_QM_GLOBAL_RLS;
+	u32 inc_val;
 
-	if (vport_id >= max_qm_global_rls) {
-		DP_NOTICE(p_hwfn,
-			  "Invalid VPORT ID for rate limiter configuration\n");
+	inc_val = QM_RL_INC_VAL(rate_limit);
+	if (inc_val > QM_VP_RL_MAX_INC_VAL(rate_limit)) {
+		DP_NOTICE(p_hwfn, "Invalid rate limit configuration.\n");
 		return -1;
 	}
 
-	inc_val = QM_RL_INC_VAL(vport_rl ? vport_rl : link_speed);
-	if (inc_val > QM_VP_RL_MAX_INC_VAL(link_speed)) {
-		DP_NOTICE(p_hwfn, "Invalid VPORT rate-limit configuration\n");
-		return -1;
-	}
-
-	qed_wr(p_hwfn,
-	       p_ptt,
-	       QM_REG_RLGLBLCRD + vport_id * 4, (u32)QM_RL_CRD_REG_SIGN_BIT);
-	qed_wr(p_hwfn, p_ptt, QM_REG_RLGLBLINCVAL + vport_id * 4, inc_val);
+	qed_wr(p_hwfn, p_ptt,
+	       QM_REG_RLGLBLCRD + rl_id * 4, (u32)QM_RL_CRD_REG_SIGN_BIT);
+	qed_wr(p_hwfn, p_ptt, QM_REG_RLGLBLINCVAL + rl_id * 4, inc_val);
 
 	return 0;
 }
@@ -1013,7 +1000,6 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
 	return true;
 }
 
-
 #define SET_TUNNEL_TYPE_ENABLE_BIT(var, offset, enable) \
 	do { \
 		typeof(var) *__p_var = &(var); \
@@ -1021,8 +1007,59 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
 		*__p_var = (*__p_var & ~BIT(__offset)) | \
 			   ((enable) ? BIT(__offset) : 0); \
 	} while (0)
-#define PRS_ETH_TUNN_OUTPUT_FORMAT        -188897008
-#define PRS_ETH_OUTPUT_FORMAT             -46832
+
+#define PRS_ETH_TUNN_OUTPUT_FORMAT     0xF4DAB910
+#define PRS_ETH_OUTPUT_FORMAT          0xFFFF4910
+
+#define ARR_REG_WR(dev, ptt, addr, arr,	arr_size) \
+	do { \
+		u32 i; \
+		\
+		for (i = 0; i < (arr_size); i++) \
+			qed_wr(dev, ptt, \
+			       ((addr) + (4 * i)), \
+			       ((u32 *)&(arr))[i]); \
+	} while (0)
+
+/**
+ * @brief qed_dmae_to_grc - is an internal function - writes from host to
+ * wide-bus registers (split registers are not supported yet)
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - ptt window used for writing the registers.
+ * @param p_data - pointer to source data.
+ * @param addr - Destination register address.
+ * @param len_in_dwords - data length in DWARDS (u32)
+ */
+static int qed_dmae_to_grc(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt,
+			   u32 *p_data, u32 addr, u32 len_in_dwords)
+{
+	struct qed_dmae_params params = {};
+	int rc;
+
+	if (!p_data)
+		return -1;
+
+	/* Set DMAE params */
+	SET_FIELD(params.flags, QED_DMAE_PARAMS_COMPLETION_DST, 1);
+
+	/* Execute DMAE command */
+	rc = qed_dmae_host2grc(p_hwfn, p_ptt,
+			       (u64)(uintptr_t)(p_data),
+			       addr, len_in_dwords, &params);
+
+	/* If not read using DMAE, read using GRC */
+	if (rc) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_DEBUG,
+			   "Failed writing to chip using DMAE, using GRC instead\n");
+		/* write to registers using GRC */
+		ARR_REG_WR(p_hwfn, p_ptt, addr, p_data, len_in_dwords);
+	}
+
+	return len_in_dwords;
+}
 
 void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u16 dest_port)
@@ -1166,8 +1203,8 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 	       ip_geneve_enable ? 1 : 0);
 }
 
-#define PRS_ETH_VXLAN_NO_L2_ENABLE_OFFSET   4
-#define PRS_ETH_VXLAN_NO_L2_OUTPUT_FORMAT      -927094512
+#define PRS_ETH_VXLAN_NO_L2_ENABLE_OFFSET      3
+#define PRS_ETH_VXLAN_NO_L2_OUTPUT_FORMAT   -925189872
 
 void qed_set_vxlan_no_l2_enable(struct qed_hwfn *p_hwfn,
 				struct qed_ptt *p_ptt, bool enable)
@@ -1208,6 +1245,8 @@ void qed_set_vxlan_no_l2_enable(struct qed_hwfn *p_hwfn,
 
 void qed_gft_disable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 pf_id)
 {
+	struct regpair ram_line = { };
+
 	/* Disable gft search for PF */
 	qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 0);
 
@@ -1217,12 +1256,9 @@ void qed_gft_disable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 pf_id)
 	qed_wr(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id, 0);
 
 	/* Zero ramline */
-	qed_wr(p_hwfn,
-	       p_ptt, PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id, 0);
-	qed_wr(p_hwfn,
-	       p_ptt,
-	       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id + REG_SIZE,
-	       0);
+	qed_dmae_to_grc(p_hwfn, p_ptt, (u32 *)&ram_line,
+			PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id,
+			sizeof(ram_line) / REG_SIZE);
 }
 
 void qed_gft_config(struct qed_hwfn *p_hwfn,
@@ -1232,7 +1268,8 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
 		    bool udp,
 		    bool ipv4, bool ipv6, enum gft_profile_type profile_type)
 {
-	u32 reg_val, cam_line, ram_line_lo, ram_line_hi, search_non_ip_as_gft;
+	u32 reg_val, cam_line, search_non_ip_as_gft;
+	struct regpair ram_line = { };
 
 	if (!ipv6 && !ipv4)
 		DP_NOTICE(p_hwfn,
@@ -1298,35 +1335,33 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
 	    qed_rd(p_hwfn, p_ptt, PRS_REG_GFT_CAM + CAM_LINE_SIZE * pf_id);
 
 	/* Write line to RAM - compare to filter 4 tuple */
-	ram_line_lo = 0;
-	ram_line_hi = 0;
 
 	/* Search no IP as GFT */
 	search_non_ip_as_gft = 0;
 
 	/* Tunnel type */
-	SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_DST_PORT, 1);
-	SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL, 1);
+	SET_FIELD(ram_line.lo, GFT_RAM_LINE_TUNNEL_DST_PORT, 1);
+	SET_FIELD(ram_line.lo, GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL, 1);
 
 	if (profile_type == GFT_PROFILE_TYPE_4_TUPLE) {
-		SET_FIELD(ram_line_hi, GFT_RAM_LINE_DST_IP, 1);
-		SET_FIELD(ram_line_hi, GFT_RAM_LINE_SRC_IP, 1);
-		SET_FIELD(ram_line_hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_SRC_PORT, 1);
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_DST_PORT, 1);
+		SET_FIELD(ram_line.hi, GFT_RAM_LINE_DST_IP, 1);
+		SET_FIELD(ram_line.hi, GFT_RAM_LINE_SRC_IP, 1);
+		SET_FIELD(ram_line.hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_ETHERTYPE, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_SRC_PORT, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_DST_PORT, 1);
 	} else if (profile_type == GFT_PROFILE_TYPE_L4_DST_PORT) {
-		SET_FIELD(ram_line_hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_DST_PORT, 1);
+		SET_FIELD(ram_line.hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_ETHERTYPE, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_DST_PORT, 1);
 	} else if (profile_type == GFT_PROFILE_TYPE_IP_DST_ADDR) {
-		SET_FIELD(ram_line_hi, GFT_RAM_LINE_DST_IP, 1);
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
+		SET_FIELD(ram_line.hi, GFT_RAM_LINE_DST_IP, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_ETHERTYPE, 1);
 	} else if (profile_type == GFT_PROFILE_TYPE_IP_SRC_ADDR) {
-		SET_FIELD(ram_line_hi, GFT_RAM_LINE_SRC_IP, 1);
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
+		SET_FIELD(ram_line.hi, GFT_RAM_LINE_SRC_IP, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_ETHERTYPE, 1);
 	} else if (profile_type == GFT_PROFILE_TYPE_TUNNEL_TYPE) {
-		SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_ETHERTYPE, 1);
+		SET_FIELD(ram_line.lo, GFT_RAM_LINE_TUNNEL_ETHERTYPE, 1);
 
 		/* Allow tunneled traffic without inner IP */
 		search_non_ip_as_gft = 1;
@@ -1334,24 +1369,17 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
 
 	qed_wr(p_hwfn,
 	       p_ptt, PRS_REG_SEARCH_NON_IP_AS_GFT, search_non_ip_as_gft);
-	qed_wr(p_hwfn,
-	       p_ptt,
-	       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id,
-	       ram_line_lo);
-	qed_wr(p_hwfn,
-	       p_ptt,
-	       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id + REG_SIZE,
-	       ram_line_hi);
+	qed_dmae_to_grc(p_hwfn, p_ptt, (u32 *)&ram_line,
+			PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id,
+			sizeof(ram_line) / REG_SIZE);
 
 	/* Set default profile so that no filter match will happen */
-	qed_wr(p_hwfn,
-	       p_ptt,
-	       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE *
-	       PRS_GFT_CAM_LINES_NO_MATCH, 0xffffffff);
-	qed_wr(p_hwfn,
-	       p_ptt,
-	       PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE *
-	       PRS_GFT_CAM_LINES_NO_MATCH + REG_SIZE, 0x3ff);
+	ram_line.lo = 0xffffffff;
+	ram_line.hi = 0x3ff;
+	qed_dmae_to_grc(p_hwfn, p_ptt, (u32 *)&ram_line,
+			PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE *
+			PRS_GFT_CAM_LINES_NO_MATCH,
+			sizeof(ram_line) / REG_SIZE);
 
 	/* Enable gft search */
 	qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 1);
@@ -1544,3 +1572,144 @@ void qed_set_rdma_error_level(struct qed_hwfn *p_hwfn,
 		qed_wr(p_hwfn, p_ptt, ram_addr, assert_level[storm_id]);
 	}
 }
+
+#define PHYS_ADDR_DWORDS        DIV_ROUND_UP(sizeof(dma_addr_t), 4)
+#define OVERLAY_HDR_SIZE_DWORDS (sizeof(struct fw_overlay_buf_hdr) / 4)
+
+static u32 qed_get_overlay_addr_ram_addr(struct qed_hwfn *p_hwfn, u8 storm_id)
+{
+	switch (storm_id) {
+	case 0:
+		return TSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+		    TSTORM_OVERLAY_BUF_ADDR_OFFSET;
+	case 1:
+		return MSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+		    MSTORM_OVERLAY_BUF_ADDR_OFFSET;
+	case 2:
+		return USEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+		    USTORM_OVERLAY_BUF_ADDR_OFFSET;
+	case 3:
+		return XSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+		    XSTORM_OVERLAY_BUF_ADDR_OFFSET;
+	case 4:
+		return YSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+		    YSTORM_OVERLAY_BUF_ADDR_OFFSET;
+	case 5:
+		return PSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+		    PSTORM_OVERLAY_BUF_ADDR_OFFSET;
+
+	default:
+		return 0;
+	}
+}
+
+struct phys_mem_desc *qed_fw_overlay_mem_alloc(struct qed_hwfn *p_hwfn,
+					       const u32 * const
+					       fw_overlay_in_buf,
+					       u32 buf_size_in_bytes)
+{
+	u32 buf_size = buf_size_in_bytes / sizeof(u32), buf_offset = 0;
+	struct phys_mem_desc *allocated_mem;
+
+	if (!buf_size)
+		return NULL;
+
+	allocated_mem = kcalloc(NUM_STORMS, sizeof(struct phys_mem_desc),
+				GFP_KERNEL);
+	if (!allocated_mem)
+		return NULL;
+
+	memset(allocated_mem, 0, NUM_STORMS * sizeof(struct phys_mem_desc));
+
+	/* For each Storm, set physical address in RAM */
+	while (buf_offset < buf_size) {
+		struct phys_mem_desc *storm_mem_desc;
+		struct fw_overlay_buf_hdr *hdr;
+		u32 storm_buf_size;
+		u8 storm_id;
+
+		hdr =
+		    (struct fw_overlay_buf_hdr *)&fw_overlay_in_buf[buf_offset];
+		storm_buf_size = GET_FIELD(hdr->data,
+					   FW_OVERLAY_BUF_HDR_BUF_SIZE);
+		storm_id = GET_FIELD(hdr->data, FW_OVERLAY_BUF_HDR_STORM_ID);
+		storm_mem_desc = allocated_mem + storm_id;
+		storm_mem_desc->size = storm_buf_size * sizeof(u32);
+
+		/* Allocate physical memory for Storm's overlays buffer */
+		storm_mem_desc->virt_addr =
+		    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				       storm_mem_desc->size,
+				       &storm_mem_desc->phys_addr, GFP_KERNEL);
+		if (!storm_mem_desc->virt_addr)
+			break;
+
+		/* Skip overlays buffer header */
+		buf_offset += OVERLAY_HDR_SIZE_DWORDS;
+
+		/* Copy Storm's overlays buffer to allocated memory */
+		memcpy(storm_mem_desc->virt_addr,
+		       &fw_overlay_in_buf[buf_offset], storm_mem_desc->size);
+
+		/* Advance to next Storm */
+		buf_offset += storm_buf_size;
+	}
+
+	/* If memory allocation has failed, free all allocated memory */
+	if (buf_offset < buf_size) {
+		qed_fw_overlay_mem_free(p_hwfn, allocated_mem);
+		return NULL;
+	}
+
+	return allocated_mem;
+}
+
+void qed_fw_overlay_init_ram(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt,
+			     struct phys_mem_desc *fw_overlay_mem)
+{
+	u8 storm_id;
+
+	for (storm_id = 0; storm_id < NUM_STORMS; storm_id++) {
+		struct phys_mem_desc *storm_mem_desc =
+		    (struct phys_mem_desc *)fw_overlay_mem + storm_id;
+		u32 ram_addr, i;
+
+		/* Skip Storms with no FW overlays */
+		if (!storm_mem_desc->virt_addr)
+			continue;
+
+		/* Calculate overlay RAM GRC address of current PF */
+		ram_addr = qed_get_overlay_addr_ram_addr(p_hwfn, storm_id) +
+			   sizeof(dma_addr_t) * p_hwfn->rel_pf_id;
+
+		/* Write Storm's overlay physical address to RAM */
+		for (i = 0; i < PHYS_ADDR_DWORDS; i++, ram_addr += sizeof(u32))
+			qed_wr(p_hwfn, p_ptt, ram_addr,
+			       ((u32 *)&storm_mem_desc->phys_addr)[i]);
+	}
+}
+
+void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
+			     struct phys_mem_desc *fw_overlay_mem)
+{
+	u8 storm_id;
+
+	if (!fw_overlay_mem)
+		return;
+
+	for (storm_id = 0; storm_id < NUM_STORMS; storm_id++) {
+		struct phys_mem_desc *storm_mem_desc =
+		    (struct phys_mem_desc *)fw_overlay_mem + storm_id;
+
+		/* Free Storm's physical memory */
+		if (storm_mem_desc->virt_addr)
+			dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+					  storm_mem_desc->size,
+					  storm_mem_desc->virt_addr,
+					  storm_mem_desc->phys_addr);
+	}
+
+	/* Free allocated virtual memory */
+	kfree(fw_overlay_mem);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index a868d7f88601..5a6e4ac4fef4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -54,15 +54,15 @@ static u32 pxp_global_win[] = {
 	0x1c80, /* win 3: addr=0x1c80000, size=4096 bytes */
 	0x1d00, /* win 4: addr=0x1d00000, size=4096 bytes */
 	0x1d01, /* win 5: addr=0x1d01000, size=4096 bytes */
-	0x1d80, /* win 6: addr=0x1d80000, size=4096 bytes */
-	0x1d81, /* win 7: addr=0x1d81000, size=4096 bytes */
-	0x1d82, /* win 8: addr=0x1d82000, size=4096 bytes */
-	0x1e00, /* win 9: addr=0x1e00000, size=4096 bytes */
-	0x1e80, /* win 10: addr=0x1e80000, size=4096 bytes */
-	0x1f00, /* win 11: addr=0x1f00000, size=4096 bytes */
-	0,
-	0,
-	0,
+	0x1d02, /* win 6: addr=0x1d02000, size=4096 bytes */
+	0x1d80, /* win 7: addr=0x1d80000, size=4096 bytes */
+	0x1d81, /* win 8: addr=0x1d81000, size=4096 bytes */
+	0x1d82, /* win 9: addr=0x1d82000, size=4096 bytes */
+	0x1e00, /* win 10: addr=0x1e00000, size=4096 bytes */
+	0x1e01, /* win 11: addr=0x1e01000, size=4096 bytes */
+	0x1e80, /* win 12: addr=0x1e80000, size=4096 bytes */
+	0x1f00, /* win 13: addr=0x1f00000, size=4096 bytes */
+	0x1c08, /* win 14: addr=0x1c08000, size=4096 bytes */
 	0,
 	0,
 	0,
@@ -74,15 +74,6 @@ void qed_init_iro_array(struct qed_dev *cdev)
 	cdev->iro_arr = iro_arr;
 }
 
-/* Runtime configuration helpers */
-void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn)
-{
-	int i;
-
-	for (i = 0; i < RUNTIME_ARRAY_SIZE; i++)
-		p_hwfn->rt_data.b_valid[i] = false;
-}
-
 void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, u32 rt_offset, u32 val)
 {
 	p_hwfn->rt_data.init_val[rt_offset] = val;
@@ -106,7 +97,7 @@ static int qed_init_rt(struct qed_hwfn	*p_hwfn,
 {
 	u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset];
 	bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset];
-	u16 i, segment;
+	u16 i, j, segment;
 	int rc = 0;
 
 	/* Since not all RT entries are initialized, go over the RT and
@@ -121,6 +112,7 @@ static int qed_init_rt(struct qed_hwfn	*p_hwfn,
 		 */
 		if (!b_must_dmae) {
 			qed_wr(p_hwfn, p_ptt, addr + (i << 2), p_init_val[i]);
+			p_valid[i] = false;
 			continue;
 		}
 
@@ -135,6 +127,10 @@ static int qed_init_rt(struct qed_hwfn	*p_hwfn,
 		if (rc)
 			return rc;
 
+		/* invalidate after writing */
+		for (j = i; j < i + segment; j++)
+			p_valid[j] = false;
+
 		/* Jump over the entire segment, including invalid entry */
 		i += segment;
 	}
@@ -215,7 +211,7 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 	 * 3. p_hwfb->temp_data,
 	 * 4. fill_count
 	 */
-	params.flags = QED_DMAE_FLAG_RW_REPL_SRC;
+	SET_FIELD(params.flags, QED_DMAE_PARAMS_RW_REPL_SRC, 0x1);
 	return qed_dmae_host2grc(p_hwfn, p_ptt,
 				 (uintptr_t)(&zero_buffer[0]),
 				 addr, fill_count, &params);
@@ -490,10 +486,10 @@ static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn,
 int qed_init_run(struct qed_hwfn *p_hwfn,
 		 struct qed_ptt *p_ptt, int phase, int phase_id, int modes)
 {
+	bool b_dmae = (phase != PHASE_ENGINE);
 	struct qed_dev *cdev = p_hwfn->cdev;
 	u32 cmd_num, num_init_ops;
 	union init_op *init_ops;
-	bool b_dmae = false;
 	int rc = 0;
 
 	num_init_ops = cdev->fw_data->init_ops_size;
@@ -522,7 +518,6 @@ int qed_init_run(struct qed_hwfn *p_hwfn,
 		case INIT_OP_IF_PHASE:
 			cmd_num += qed_init_cmd_phase(p_hwfn, &cmd->if_phase,
 						      phase, phase_id);
-			b_dmae = GET_FIELD(data, INIT_IF_PHASE_OP_DMAE_ENABLE);
 			break;
 		case INIT_OP_DELAY:
 			/* qed_init_run is always invoked from
@@ -533,6 +528,9 @@ int qed_init_run(struct qed_hwfn *p_hwfn,
 
 		case INIT_OP_CALLBACK:
 			rc = qed_init_cmd_cb(p_hwfn, p_ptt, &cmd->callback);
+			if (phase == PHASE_ENGINE &&
+			    cmd->callback.callback_id == DMAE_READY_CB)
+				b_dmae = true;
 			break;
 		}
 
@@ -587,5 +585,10 @@ int qed_init_fw_data(struct qed_dev *cdev, const u8 *data)
 	len = buf_hdr[BIN_BUF_INIT_CMD].length;
 	fw->init_ops_size = len / sizeof(struct init_raw_op);
 
+	offset = buf_hdr[BIN_BUF_INIT_OVERLAYS].offset;
+	fw->fw_overlays = (u32 *)(data + offset);
+	len = buf_hdr[BIN_BUF_INIT_OVERLAYS].length;
+	fw->fw_overlays_len = len;
+
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.h b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
index 555dd086796d..e9e8ade50ed3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
@@ -81,14 +81,6 @@ int qed_init_alloc(struct qed_hwfn *p_hwfn);
 void qed_init_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_init_clear_rt_data - Clears the runtime init array.
- *
- *
- * @param p_hwfn
- */
-void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn);
-
-/**
  * @brief qed_init_store_rt_reg - Store a configuration value in the RT array.
  *
  *
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 5585c18053ec..7245a615517a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -204,17 +204,14 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 	}
 
-	SET_FIELD(p_init->hdr.flags,
-		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, ISCSI_SLOW_PATH_LAYER_CODE);
-	p_init->hdr.op_code = ISCSI_RAMROD_CMD_ID_INIT_FUNC;
-
 	val = p_params->half_way_close_timeout;
 	p_init->half_way_close_timeout = cpu_to_le16(val);
 	p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
 	p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
 	p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
-	p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] +
-				  p_params->ll2_ooo_queue_id;
+	p_init->ll2_rx_queue_id =
+	    p_hwfn->hw_info.resc_start[QED_LL2_RAM_QUEUE] +
+	    p_params->ll2_ooo_queue_id;
 
 	p_init->func_params.log_page_size = p_params->log_page_size;
 	val = p_params->num_tasks;
@@ -331,12 +328,7 @@ static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
 	p_conn->physical_q1 = cpu_to_le16(physical_q);
 	p_ramrod->iscsi.physical_q1 = cpu_to_le16(physical_q);
 
-	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN;
-	SET_FIELD(p_ramrod->hdr.flags, ISCSI_SLOW_PATH_HDR_LAYER_CODE,
-		  p_conn->layer_code);
-
 	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
-	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
 
 	DMA_REGPAIR_LE(p_ramrod->iscsi.sq_pbl_addr, p_conn->sq_pbl_addr);
 
@@ -492,12 +484,8 @@ static int qed_sp_iscsi_conn_update(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.iscsi_conn_update;
-	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_UPDATE_CONN;
-	SET_FIELD(p_ramrod->hdr.flags,
-		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
 
 	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
-	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
 	p_ramrod->flags = p_conn->update_flag;
 	p_ramrod->max_seq_size = cpu_to_le32(p_conn->max_seq_size);
 	dval = p_conn->max_recv_pdu_length;
@@ -537,12 +525,8 @@ qed_sp_iscsi_mac_update(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.iscsi_conn_mac_update;
-	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_MAC_UPDATE;
-	SET_FIELD(p_ramrod->hdr.flags,
-		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
 
 	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
-	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
 	ucval = p_conn->remote_mac[1];
 	((u8 *)(&p_ramrod->remote_mac_addr_hi))[0] = ucval;
 	ucval = p_conn->remote_mac[0];
@@ -583,12 +567,8 @@ static int qed_sp_iscsi_conn_terminate(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.iscsi_conn_terminate;
-	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_TERMINATION_CONN;
-	SET_FIELD(p_ramrod->hdr.flags,
-		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
 
 	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
-	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
 	p_ramrod->abortive = p_conn->abortive_dsconnect;
 
 	DMA_REGPAIR_LE(p_ramrod->query_params_addr,
@@ -603,7 +583,6 @@ static int qed_sp_iscsi_conn_clear_sq(struct qed_hwfn *p_hwfn,
 				      enum spq_mode comp_mode,
 				      struct qed_spq_comp_cb *p_comp_addr)
 {
-	struct iscsi_slow_path_hdr *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	int rc = -EINVAL;
@@ -621,11 +600,6 @@ static int qed_sp_iscsi_conn_clear_sq(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	p_ramrod = &p_ent->ramrod.iscsi_empty;
-	p_ramrod->op_code = ISCSI_RAMROD_CMD_ID_CLEAR_SQ;
-	SET_FIELD(p_ramrod->flags,
-		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
-
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
@@ -633,7 +607,6 @@ static int qed_sp_iscsi_func_stop(struct qed_hwfn *p_hwfn,
 				  enum spq_mode comp_mode,
 				  struct qed_spq_comp_cb *p_comp_addr)
 {
-	struct iscsi_spe_func_dstry *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	int rc = 0;
@@ -651,9 +624,6 @@ static int qed_sp_iscsi_func_stop(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	p_ramrod = &p_ent->ramrod.iscsi_destroy;
-	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_DESTROY_FUNC;
-
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
 	qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_ISCSI);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 65ec16a31658..d2fe61a5cf56 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -137,8 +137,8 @@ qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
 			 struct iwarp_init_func_ramrod_data *p_ramrod)
 {
 	p_ramrod->iwarp.ll2_ooo_q_index =
-		RESC_START(p_hwfn, QED_LL2_QUEUE) +
-		p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle;
+	    RESC_START(p_hwfn, QED_LL2_RAM_QUEUE) +
+	    p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle;
 
 	p_ramrod->tcp.max_fin_rt = QED_IWARP_MAX_FIN_RT_DEFAULT;
 
@@ -2651,6 +2651,8 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 
 	memset(&data, 0, sizeof(data));
 	data.input.conn_type = QED_LL2_TYPE_IWARP;
+	/* SYN will use ctx based queues */
+	data.input.rx_conn_type = QED_LL2_RX_TYPE_CTX;
 	data.input.mtu = params->max_mtu;
 	data.input.rx_num_desc = QED_IWARP_LL2_SYN_RX_SIZE;
 	data.input.tx_num_desc = QED_IWARP_LL2_SYN_TX_SIZE;
@@ -2683,6 +2685,8 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 
 	/* Start OOO connection */
 	data.input.conn_type = QED_LL2_TYPE_OOO;
+	/* OOO/unaligned will use legacy ll2 queues (ram based) */
+	data.input.rx_conn_type = QED_LL2_RX_TYPE_LEGACY;
 	data.input.mtu = params->max_mtu;
 
 	n_ooo_bufs = (QED_IWARP_MAX_OOO * rcv_wnd_size) /
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 19a1a58d60f8..037e5978787e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -962,7 +962,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.core_rx_queue_start;
-
+	memset(p_ramrod, 0, sizeof(*p_ramrod));
 	p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn));
 	p_ramrod->sb_index = p_rx->rx_sb_index;
 	p_ramrod->complete_event_flg = 1;
@@ -996,6 +996,8 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 
 	p_ramrod->action_on_error.error_type = action_on_error;
 	p_ramrod->gsi_offload_flag = p_ll2_conn->input.gsi_enable;
+	p_ramrod->zero_prod_flg = 1;
+
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
@@ -1317,6 +1319,25 @@ qed_ll2_set_cbs(struct qed_ll2_info *p_ll2_info, const struct qed_ll2_cbs *cbs)
 	return 0;
 }
 
+static void _qed_ll2_calc_allowed_conns(struct qed_hwfn *p_hwfn,
+					struct qed_ll2_acquire_data *data,
+					u8 *start_idx, u8 *last_idx)
+{
+	/* LL2 queues handles will be split as follows:
+	 * First will be the legacy queues, and then the ctx based.
+	 */
+	if (data->input.rx_conn_type == QED_LL2_RX_TYPE_LEGACY) {
+		*start_idx = QED_LL2_LEGACY_CONN_BASE_PF;
+		*last_idx = *start_idx +
+			QED_MAX_NUM_OF_LEGACY_LL2_CONNS_PF;
+	} else {
+		/* QED_LL2_RX_TYPE_CTX */
+		*start_idx = QED_LL2_CTX_CONN_BASE_PF;
+		*last_idx = *start_idx +
+			QED_MAX_NUM_OF_CTX_LL2_CONNS_PF;
+	}
+}
+
 static enum core_error_handle
 qed_ll2_get_error_choice(enum qed_ll2_error_handle err)
 {
@@ -1337,14 +1358,16 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
 	struct qed_hwfn *p_hwfn = cxt;
 	qed_int_comp_cb_t comp_rx_cb, comp_tx_cb;
 	struct qed_ll2_info *p_ll2_info = NULL;
-	u8 i, *p_tx_max;
+	u8 i, first_idx, last_idx, *p_tx_max;
 	int rc;
 
 	if (!data->p_connection_handle || !p_hwfn->p_ll2_info)
 		return -EINVAL;
 
+	_qed_ll2_calc_allowed_conns(p_hwfn, data, &first_idx, &last_idx);
+
 	/* Find a free connection to be used */
-	for (i = 0; (i < QED_MAX_NUM_OF_LL2_CONNECTIONS); i++) {
+	for (i = first_idx; i < last_idx; i++) {
 		mutex_lock(&p_hwfn->p_ll2_info[i].mutex);
 		if (p_hwfn->p_ll2_info[i].b_active) {
 			mutex_unlock(&p_hwfn->p_ll2_info[i].mutex);
@@ -1448,6 +1471,7 @@ static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn,
 	enum qed_ll2_error_handle error_input;
 	enum core_error_handle error_mode;
 	u8 action_on_error = 0;
+	int rc;
 
 	if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
 		return 0;
@@ -1461,7 +1485,18 @@ static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn,
 	error_mode = qed_ll2_get_error_choice(error_input);
 	SET_FIELD(action_on_error, CORE_RX_ACTION_ON_ERROR_NO_BUFF, error_mode);
 
-	return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error);
+	rc = qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error);
+	if (rc)
+		return rc;
+
+	if (p_ll2_conn->rx_queue.ctx_based) {
+		rc = qed_db_recovery_add(p_hwfn->cdev,
+					 p_ll2_conn->rx_queue.set_prod_addr,
+					 &p_ll2_conn->rx_queue.db_data,
+					 DB_REC_WIDTH_64B, DB_REC_KERNEL);
+	}
+
+	return rc;
 }
 
 static void
@@ -1475,13 +1510,41 @@ qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
 	qed_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn);
 }
 
+static inline u8 qed_ll2_handle_to_queue_id(struct qed_hwfn *p_hwfn,
+					    u8 handle,
+					    u8 ll2_queue_type)
+{
+	u8 qid;
+
+	if (ll2_queue_type == QED_LL2_RX_TYPE_LEGACY)
+		return p_hwfn->hw_info.resc_start[QED_LL2_RAM_QUEUE] + handle;
+
+	/* QED_LL2_RX_TYPE_CTX
+	 * FW distinguishes between the legacy queues (ram based) and the
+	 * ctx based queues by the queue_id.
+	 * The first MAX_NUM_LL2_RX_RAM_QUEUES queues are legacy
+	 * and the queue ids above that are ctx base.
+	 */
+	qid = p_hwfn->hw_info.resc_start[QED_LL2_CTX_QUEUE] +
+	      MAX_NUM_LL2_RX_RAM_QUEUES;
+
+	/* See comment on the acquire connection for how the ll2
+	 * queues handles are divided.
+	 */
+	qid += (handle - QED_MAX_NUM_OF_LEGACY_LL2_CONNS_PF);
+
+	return qid;
+}
+
 int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 {
-	struct qed_hwfn *p_hwfn = cxt;
-	struct qed_ll2_info *p_ll2_conn;
+	struct e4_core_conn_context *p_cxt;
 	struct qed_ll2_tx_packet *p_pkt;
+	struct qed_ll2_info *p_ll2_conn;
+	struct qed_hwfn *p_hwfn = cxt;
 	struct qed_ll2_rx_queue *p_rx;
 	struct qed_ll2_tx_queue *p_tx;
+	struct qed_cxt_info cxt_info;
 	struct qed_ptt *p_ptt;
 	int rc = -EINVAL;
 	u32 i, capacity;
@@ -1539,13 +1602,46 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_ll2_conn->cid);
 	if (rc)
 		goto out;
+	cxt_info.iid = p_ll2_conn->cid;
+	rc = qed_cxt_get_cid_info(p_hwfn, &cxt_info);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Cannot find context info for cid=%d\n",
+			  p_ll2_conn->cid);
+		goto out;
+	}
+
+	p_cxt = cxt_info.p_cxt;
+
+	memset(p_cxt, 0, sizeof(*p_cxt));
 
-	qid = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] + connection_handle;
+	qid = qed_ll2_handle_to_queue_id(p_hwfn, connection_handle,
+					 p_ll2_conn->input.rx_conn_type);
 	p_ll2_conn->queue_id = qid;
 	p_ll2_conn->tx_stats_id = qid;
-	p_rx->set_prod_addr = (u8 __iomem *)p_hwfn->regview +
-					    GTT_BAR0_MAP_REG_TSDM_RAM +
-					    TSTORM_LL2_RX_PRODS_OFFSET(qid);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
+		   "Establishing ll2 queue. PF %d ctx_based=%d abs qid=%d\n",
+		   p_hwfn->rel_pf_id, p_ll2_conn->input.rx_conn_type, qid);
+
+	if (p_ll2_conn->input.rx_conn_type == QED_LL2_RX_TYPE_LEGACY) {
+		p_rx->set_prod_addr = p_hwfn->regview +
+		    GTT_BAR0_MAP_REG_TSDM_RAM + TSTORM_LL2_RX_PRODS_OFFSET(qid);
+	} else {
+		/* QED_LL2_RX_TYPE_CTX - using doorbell */
+		p_rx->ctx_based = 1;
+
+		p_rx->set_prod_addr = p_hwfn->doorbells +
+			p_hwfn->dpi_start_offset +
+			DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_LL2_PROD_UPDATE);
+
+		/* prepare db data */
+		p_rx->db_data.icid = cpu_to_le16((u16)p_ll2_conn->cid);
+		SET_FIELD(p_rx->db_data.params,
+			  CORE_PWM_PROD_UPDATE_DATA_AGG_CMD, DB_AGG_CMD_SET);
+		SET_FIELD(p_rx->db_data.params,
+			  CORE_PWM_PROD_UPDATE_DATA_RESERVED1, 0);
+	}
+
 	p_tx->doorbell_addr = (u8 __iomem *)p_hwfn->doorbells +
 					    qed_db_addr(p_ll2_conn->cid,
 							DQ_DEMS_LEGACY);
@@ -1556,7 +1652,6 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 		  DQ_XCM_CORE_TX_BD_PROD_CMD);
 	p_tx->db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
 
-
 	rc = qed_ll2_establish_connection_rx(p_hwfn, p_ll2_conn);
 	if (rc)
 		goto out;
@@ -1590,7 +1685,7 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn,
 					     struct qed_ll2_rx_packet *p_curp)
 {
 	struct qed_ll2_rx_packet *p_posting_packet = NULL;
-	struct core_ll2_rx_prod rx_prod = { 0, 0, 0 };
+	struct core_ll2_rx_prod rx_prod = { 0, 0 };
 	bool b_notify_fw = false;
 	u16 bd_prod, cq_prod;
 
@@ -1615,13 +1710,27 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn,
 
 	bd_prod = qed_chain_get_prod_idx(&p_rx->rxq_chain);
 	cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain);
-	rx_prod.bd_prod = cpu_to_le16(bd_prod);
-	rx_prod.cqe_prod = cpu_to_le16(cq_prod);
+	if (p_rx->ctx_based) {
+		/* update producer by giving a doorbell */
+		p_rx->db_data.prod.bd_prod = cpu_to_le16(bd_prod);
+		p_rx->db_data.prod.cqe_prod = cpu_to_le16(cq_prod);
+		/* Make sure chain element is updated before ringing the
+		 * doorbell
+		 */
+		dma_wmb();
+		DIRECT_REG_WR64(p_rx->set_prod_addr,
+				*((u64 *)&p_rx->db_data));
+	} else {
+		rx_prod.bd_prod = cpu_to_le16(bd_prod);
+		rx_prod.cqe_prod = cpu_to_le16(cq_prod);
 
-	/* Make sure chain element is updated before ringing the doorbell */
-	dma_wmb();
+		/* Make sure chain element is updated before ringing the
+		 * doorbell
+		 */
+		dma_wmb();
 
-	DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod));
+		DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod));
+	}
 }
 
 int qed_ll2_post_rx_buffer(void *cxt,
@@ -1965,6 +2074,12 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
 	if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
 		p_ll2_conn->rx_queue.b_cb_registered = false;
 		smp_wmb(); /* Make sure this is seen by ll2_lb_rxq_completion */
+
+		if (p_ll2_conn->rx_queue.ctx_based)
+			qed_db_recovery_del(p_hwfn->cdev,
+					    p_ll2_conn->rx_queue.set_prod_addr,
+					    &p_ll2_conn->rx_queue.db_data);
+
 		rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn);
 		if (rc)
 			goto out;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 5f01fbd3c073..288642d526b7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -46,6 +46,18 @@
 #include "qed_sp.h"
 
 #define QED_MAX_NUM_OF_LL2_CONNECTIONS                    (4)
+/* LL2 queues handles will be split as follows:
+ * first will be legacy queues, and then the ctx based queues.
+ */
+#define QED_MAX_NUM_OF_LL2_CONNS_PF            (4)
+#define QED_MAX_NUM_OF_LEGACY_LL2_CONNS_PF   (3)
+
+#define QED_MAX_NUM_OF_CTX_LL2_CONNS_PF	\
+	(QED_MAX_NUM_OF_LL2_CONNS_PF - QED_MAX_NUM_OF_LEGACY_LL2_CONNS_PF)
+
+#define QED_LL2_LEGACY_CONN_BASE_PF     0
+#define QED_LL2_CTX_CONN_BASE_PF        QED_MAX_NUM_OF_LEGACY_LL2_CONNS_PF
+
 
 struct qed_ll2_rx_packet {
 	struct list_head list_entry;
@@ -79,6 +91,7 @@ struct qed_ll2_rx_queue {
 	struct qed_chain rxq_chain;
 	struct qed_chain rcq_chain;
 	u8 rx_sb_index;
+	u8 ctx_based;
 	bool b_cb_registered;
 	__le16 *p_fw_cons;
 	struct list_head active_descq;
@@ -86,6 +99,7 @@ struct qed_ll2_rx_queue {
 	struct list_head posting_descq;
 	struct qed_ll2_rx_packet *descq_array;
 	void __iomem *set_prod_addr;
+	struct core_pwm_prod_update_data db_data;
 };
 
 struct qed_ll2_tx_queue {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 38f7f40b3a4d..2c189c637cca 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2637,7 +2637,7 @@ static int qed_set_grc_config(struct qed_dev *cdev, u32 cfg_id, u32 val)
 	if (!ptt)
 		return -EAGAIN;
 
-	rc = qed_dbg_grc_config(hwfn, ptt, cfg_id, val);
+	rc = qed_dbg_grc_config(hwfn, cfg_id, val);
 
 	qed_ptt_release(hwfn, ptt);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 36ddb89856a8..280527cc0578 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -48,6 +48,8 @@
 #include "qed_reg_addr.h"
 #include "qed_sriov.h"
 
+#define GRCBASE_MCP     0xe00000
+
 #define QED_MCP_RESP_ITER_US	10
 
 #define QED_DRV_MB_MAX_RETRIES	(500 * 1000)	/* Account for 5 sec */
@@ -3165,6 +3167,9 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
 	case QED_NVM_IMAGE_FCOE_CFG:
 		type = NVM_TYPE_FCOE_CFG;
 		break;
+	case QED_NVM_IMAGE_MDUMP:
+		type = NVM_TYPE_MDUMP;
+		break;
 	case QED_NVM_IMAGE_NVM_CFG1:
 		type = NVM_TYPE_NVM_CFG1;
 		break;
@@ -3261,9 +3266,12 @@ static enum resource_id_enum qed_mcp_get_mfw_res_id(enum qed_resources res_id)
 	case QED_ILT:
 		mfw_res_id = RESOURCE_ILT_E;
 		break;
-	case QED_LL2_QUEUE:
+	case QED_LL2_RAM_QUEUE:
 		mfw_res_id = RESOURCE_LL2_QUEUE_E;
 		break;
+	case QED_LL2_CTX_QUEUE:
+		mfw_res_id = RESOURCE_LL2_CQS_E;
+		break;
 	case QED_RDMA_CNQ_RAM:
 	case QED_CMDQS_CQS:
 		/* CNQ/CMDQS are the same resource */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 60f850c3bdd6..3dcb6ff58e73 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -178,6 +178,8 @@
 	0x008c80UL
 #define  MCP_REG_SCRATCH	\
 	0xe20000UL
+#define MCP_REG_SCRATCH_SIZE \
+	57344
 #define  CNIG_REG_NW_PORT_MODE_BB \
 	0x218200UL
 #define  MISCS_REG_CHIP_NUM \
@@ -212,6 +214,8 @@
 	0x580900UL
 #define  DBG_REG_CLIENT_ENABLE \
 	0x010004UL
+#define DBG_REG_TIMESTAMP_VALID_EN \
+	0x010b58UL
 #define  DMAE_REG_INIT \
 	0x00c000UL
 #define  DORQ_REG_IFEN \
@@ -350,6 +354,10 @@
 	0x24000cUL
 #define PSWRQ2_REG_ILT_MEMORY \
 	0x260000UL
+#define PSWRQ2_REG_ILT_MEMORY_SIZE_BB \
+	15200
+#define PSWRQ2_REG_ILT_MEMORY_SIZE_K2 \
+	22000
 #define  PSWHST_REG_DISCARD_INTERNAL_WRITES \
 	0x2a0040UL
 #define  PSWHST2_REG_DBGSYN_ALMOST_FULL_THR \
@@ -1453,6 +1461,8 @@
 	0x1401404UL
 #define XSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1401408UL
+#define XSEM_REG_DBG_GPRE_VECT \
+	0x1401410UL
 #define XSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1401420UL
 #define XSEM_REG_FAST_MEMORY \
@@ -1465,6 +1475,8 @@
 	0x1501404UL
 #define YSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1501408UL
+#define YSEM_REG_DBG_GPRE_VECT \
+	0x1501410UL
 #define YSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1501420UL
 #define YSEM_REG_FAST_MEMORY \
@@ -1479,6 +1491,8 @@
 	0x1601404UL
 #define PSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1601408UL
+#define PSEM_REG_DBG_GPRE_VECT \
+	0x1601410UL
 #define PSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1601420UL
 #define PSEM_REG_FAST_MEMORY \
@@ -1493,6 +1507,8 @@
 	0x1701404UL
 #define TSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1701408UL
+#define TSEM_REG_DBG_GPRE_VECT \
+	0x1701410UL
 #define TSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1701420UL
 #define TSEM_REG_FAST_MEMORY \
@@ -1507,12 +1523,16 @@
 	0x1801404UL
 #define MSEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1801408UL
+#define MSEM_REG_DBG_GPRE_VECT \
+	0x1801410UL
 #define MSEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1801420UL
 #define MSEM_REG_FAST_MEMORY \
 	0x1840000UL
 #define USEM_REG_SLOW_DBG_EMPTY_BB_K2	\
 	0x1901140UL
+#define SEM_FAST_REG_INT_RAM_SIZE \
+	20480
 #define USEM_REG_SYNC_DBG_EMPTY	\
 	0x1901160UL
 #define USEM_REG_SLOW_DBG_ACTIVE_BB_K2 \
@@ -1521,14 +1541,26 @@
 	0x1901404UL
 #define USEM_REG_DBG_FRAME_MODE_BB_K2	\
 	0x1901408UL
+#define USEM_REG_DBG_GPRE_VECT \
+	0x1901410UL
 #define USEM_REG_DBG_MODE1_CFG_BB_K2 \
 	0x1901420UL
 #define USEM_REG_FAST_MEMORY \
 	0x1940000UL
+#define SEM_FAST_REG_DBG_MODE23_SRC_DISABLE \
+	0x000748UL
+#define SEM_FAST_REG_DBG_MODE4_SRC_DISABLE \
+	0x00074cUL
+#define SEM_FAST_REG_DBG_MODE6_SRC_DISABLE \
+	0x000750UL
+#define SEM_FAST_REG_DEBUG_ACTIVE \
+	0x000740UL
 #define SEM_FAST_REG_INT_RAM \
 	0x020000UL
 #define SEM_FAST_REG_INT_RAM_SIZE_BB_K2 \
 	20480
+#define SEM_FAST_REG_RECORD_FILTER_ENABLE \
+	0x000768UL
 #define GRC_REG_TRACE_FIFO_VALID_DATA \
 	0x050064UL
 #define GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW \
@@ -1583,14 +1615,20 @@
 	0x181530UL
 #define DBG_REG_DBG_BLOCK_ON \
 	0x010454UL
+#define DBG_REG_FILTER_ENABLE \
+	0x0109d0UL
 #define DBG_REG_FRAMING_MODE \
 	0x010058UL
+#define DBG_REG_TRIGGER_ENABLE \
+	0x01054cUL
 #define SEM_FAST_REG_VFC_DATA_WR \
 	0x000b40UL
 #define SEM_FAST_REG_VFC_ADDR \
 	0x000b44UL
 #define SEM_FAST_REG_VFC_DATA_RD \
 	0x000b48UL
+#define SEM_FAST_REG_VFC_STATUS	\
+	0x000b4cUL
 #define RSS_REG_RSS_RAM_DATA \
 	0x238c20UL
 #define RSS_REG_RSS_RAM_DATA_SIZE \
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index e49fada85410..37e70562a964 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -900,7 +900,7 @@ int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
 		goto err_resp;
 
 	out_params->rq_psn = le32_to_cpu(p_resp_ramrod_res->psn);
-	rq_err_state = GET_FIELD(le32_to_cpu(p_resp_ramrod_res->err_flag),
+	rq_err_state = GET_FIELD(le32_to_cpu(p_resp_ramrod_res->flags),
 				 ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG);
 
 	dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res),
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 96ab77ae6af5..b7b4fbbbccfe 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -120,9 +120,7 @@ union ramrod_data {
 	struct fcoe_conn_terminate_ramrod_params fcoe_conn_terminate;
 	struct fcoe_stat_ramrod_params fcoe_stat;
 
-	struct iscsi_slow_path_hdr iscsi_empty;
 	struct iscsi_init_ramrod_params iscsi_init;
-	struct iscsi_spe_func_dstry iscsi_destroy;
 	struct iscsi_spe_conn_offload iscsi_conn_offload;
 	struct iscsi_conn_update_ramrod_params iscsi_conn_update;
 	struct iscsi_spe_conn_mac_update iscsi_conn_mac_update;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 7e0b795230b2..900bc603e30a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -331,8 +331,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	u8 sb_index = p_hwfn->p_eq->eq_sb_index;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
 	u8 page_cnt, i;
+	int rc;
 
 	/* update initial eq producer */
 	qed_eq_prod_update(p_hwfn,
@@ -447,7 +447,7 @@ int qed_sp_pf_update(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
+	int rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -471,7 +471,7 @@ int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EOPNOTSUPP;
+	int rc;
 
 	if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_UNKNOWN) {
 		DP_INFO(p_hwfn, "Invalid priority type %d\n",
@@ -509,7 +509,7 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
 {
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
+	int rc;
 
 	if (IS_VF(p_hwfn->cdev))
 		return qed_vf_pf_tunnel_param_update(p_hwfn, p_tunn);
@@ -546,7 +546,7 @@ int qed_sp_pf_stop(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
+	int rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index dcb5c917f373..66876af814c4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -352,7 +352,7 @@ static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
 
 	/* propagate bulletin board via dmae to vm memory */
 	memset(&params, 0, sizeof(params));
-	params.flags = QED_DMAE_FLAG_VF_DST;
+	SET_FIELD(params.flags, QED_DMAE_PARAMS_DST_VF_VALID, 0x1);
 	params.dst_vfid = p_vf->abs_vf_id;
 	return qed_dmae_host2host(p_hwfn, p_ptt, p_vf->bulletin.phys,
 				  p_vf->vf_bulletin, p_vf->bulletin.size / 4,
@@ -1225,8 +1225,8 @@ static void qed_iov_send_response(struct qed_hwfn *p_hwfn,
 
 	eng_vf_id = p_vf->abs_vf_id;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = QED_DMAE_FLAG_VF_DST;
+	memset(&params, 0, sizeof(params));
+	SET_FIELD(params.flags, QED_DMAE_PARAMS_DST_VF_VALID, 0x1);
 	params.dst_vfid = eng_vf_id;
 
 	qed_dmae_host2host(p_hwfn, p_ptt, mbx->reply_phys + sizeof(u64),
@@ -4103,8 +4103,9 @@ static int qed_iov_copy_vf_msg(struct qed_hwfn *p_hwfn, struct qed_ptt *ptt,
 	if (!vf_info)
 		return -EINVAL;
 
-	memset(&params, 0, sizeof(struct qed_dmae_params));
-	params.flags = QED_DMAE_FLAG_VF_SRC | QED_DMAE_FLAG_COMPLETION_DST;
+	memset(&params, 0, sizeof(params));
+	SET_FIELD(params.flags, QED_DMAE_PARAMS_SRC_VF_VALID, 0x1);
+	SET_FIELD(params.flags, QED_DMAE_PARAMS_COMPLETION_DST, 0x1);
 	params.src_vfid = vf_info->abs_vf_id;
 
 	if (qed_dmae_host2host(p_hwfn, ptt,
@@ -4354,9 +4355,9 @@ qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
 static int qed_iov_configure_tx_rate(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt, int vfid, int val)
 {
-	struct qed_mcp_link_state *p_link;
 	struct qed_vf_info *vf;
 	u8 abs_vp_id = 0;
+	u16 rl_id;
 	int rc;
 
 	vf = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
@@ -4367,10 +4368,8 @@ static int qed_iov_configure_tx_rate(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	p_link = &QED_LEADING_HWFN(p_hwfn->cdev)->mcp_info->link_output;
-
-	return qed_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val,
-				 p_link->speed);
+	rl_id = abs_vp_id;	/* The "rl_id" is set as the "vport_id" */
+	return qed_init_global_rl(p_hwfn, p_ptt, rl_id, (u32)val);
 }
 
 static int
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 004c0bfec41d..c6c20776b474 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -848,13 +848,13 @@ static void qede_tpa_start(struct qede_dev *edev,
 	qede_set_gro_params(edev, tpa_info->skb, cqe);
 
 cons_buf: /* We still need to handle bd_len_list to consume buffers */
-	if (likely(cqe->ext_bd_len_list[0]))
+	if (likely(cqe->bw_ext_bd_len_list[0]))
 		qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
-				   le16_to_cpu(cqe->ext_bd_len_list[0]));
+				   le16_to_cpu(cqe->bw_ext_bd_len_list[0]));
 
-	if (unlikely(cqe->ext_bd_len_list[1])) {
+	if (unlikely(cqe->bw_ext_bd_len_list[1])) {
 		DP_ERR(edev,
-		       "Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n");
+		       "Unlikely - got a TPA aggregation with more than one bw_ext_bd_len_list entry in the TPA start\n");
 		tpa_info->state = QEDE_AGG_STATE_ERROR;
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index f815435cf106..4c7f7a7fc151 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -247,6 +247,7 @@ static int qede_ptp_cfg_filters(struct qede_dev *edev)
 		break;
 
 	case HWTSTAMP_TX_ONESTEP_SYNC:
+	case HWTSTAMP_TX_ONESTEP_P2P:
 		DP_ERR(edev, "One-step timestamping is not supported\n");
 		return -ERANGE;
 	}
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 986f26578d34..0fade19e00d4 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -3602,7 +3602,7 @@ static int ql3xxx_set_mac_address(struct net_device *ndev, void *p)
 	return 0;
 }
 
-static void ql3xxx_tx_timeout(struct net_device *ndev)
+static void ql3xxx_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct ql3_adapter *qdev = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index a496390b8632..07f9067affc6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -2043,6 +2043,7 @@ static void qlcnic_83xx_exec_template_cmd(struct qlcnic_adapter *p_dev,
 			break;
 		}
 		entry += p_hdr->size;
+		cond_resched();
 	}
 	p_dev->ahw->reset.seq_index = index;
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index c07438db30ba..9dd6cb36f366 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -56,7 +56,7 @@ static int qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 static void qlcnic_remove(struct pci_dev *pdev);
 static int qlcnic_open(struct net_device *netdev);
 static int qlcnic_close(struct net_device *netdev);
-static void qlcnic_tx_timeout(struct net_device *netdev);
+static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 static void qlcnic_attach_work(struct work_struct *work);
 static void qlcnic_fwinit_work(struct work_struct *work);
 
@@ -3068,7 +3068,7 @@ static void qlcnic_dump_rings(struct qlcnic_adapter *adapter)
 
 }
 
-static void qlcnic_tx_timeout(struct net_device *netdev)
+static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index afa10a163da1..f34ae8c75bc5 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
@@ -703,6 +703,7 @@ static u32 qlcnic_read_memory_test_agent(struct qlcnic_adapter *adapter,
 		addr += 16;
 		reg_read -= 16;
 		ret += 16;
+		cond_resched();
 	}
 out:
 	mutex_unlock(&adapter->ahw->mem_lock);
@@ -1383,6 +1384,7 @@ int qlcnic_dump_fw(struct qlcnic_adapter *adapter)
 		buf_offset += entry->hdr.cap_size;
 		entry_offset += entry->hdr.offset;
 		buffer = fw_dump->data + buf_offset;
+		cond_resched();
 	}
 
 	fw_dump->clr = 1;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 98f92268cbaa..18b0c7a2d6dc 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -282,25 +282,13 @@ static int emac_close(struct net_device *netdev)
 }
 
 /* Respond to a TX hang */
-static void emac_tx_timeout(struct net_device *netdev)
+static void emac_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct emac_adapter *adpt = netdev_priv(netdev);
 
 	schedule_work(&adpt->work_thread);
 }
 
-/* IOCTL support for the interface */
-static int emac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
-{
-	if (!netif_running(netdev))
-		return -EINVAL;
-
-	if (!netdev->phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
-}
-
 /**
  * emac_update_hw_stats - read the EMAC stat registers
  *
@@ -387,7 +375,7 @@ static const struct net_device_ops emac_netdev_ops = {
 	.ndo_start_xmit		= emac_start_xmit,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_change_mtu		= emac_change_mtu,
-	.ndo_do_ioctl		= emac_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_tx_timeout		= emac_tx_timeout,
 	.ndo_get_stats64	= emac_get_stats64,
 	.ndo_set_features       = emac_set_features,
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index baac016f3ec0..5a3b65a6eb4f 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -785,7 +785,7 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static void
-qcaspi_netdev_tx_timeout(struct net_device *dev)
+qcaspi_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct qcaspi *qca = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index 0981068504fa..375a844cd27c 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -248,7 +248,7 @@ out:
 	return NETDEV_TX_OK;
 }
 
-static void qcauart_netdev_tx_timeout(struct net_device *dev)
+static void qcauart_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct qcauart *qca = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 274e5b4bc4ac..f5ecc410ff85 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -410,7 +410,7 @@ static void r6040_init_mac_regs(struct net_device *dev)
 	iowrite16(TM2TX, ioaddr + MTPR);
 }
 
-static void r6040_tx_timeout(struct net_device *dev)
+static void r6040_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct r6040_private *priv = netdev_priv(dev);
 	void __iomem *ioaddr = priv->base;
@@ -498,14 +498,6 @@ static int r6040_close(struct net_device *dev)
 	return 0;
 }
 
-static int r6040_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	if (!dev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
 static int r6040_rx(struct net_device *dev, int limit)
 {
 	struct r6040_private *priv = netdev_priv(dev);
@@ -957,7 +949,7 @@ static const struct net_device_ops r6040_netdev_ops = {
 	.ndo_set_rx_mode	= r6040_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_do_ioctl		= r6040_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl,
 	.ndo_tx_timeout		= r6040_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= r6040_poll_controller,
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 4f910c4f67b0..60d342f82fb3 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1235,7 +1235,7 @@ static int cp_close (struct net_device *dev)
 	return 0;
 }
 
-static void cp_tx_timeout(struct net_device *dev)
+static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 55d01266e615..5caeb8368eab 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -642,7 +642,7 @@ static int mdio_read (struct net_device *dev, int phy_id, int location);
 static void mdio_write (struct net_device *dev, int phy_id, int location,
 			int val);
 static void rtl8139_start_thread(struct rtl8139_private *tp);
-static void rtl8139_tx_timeout (struct net_device *dev);
+static void rtl8139_tx_timeout (struct net_device *dev, unsigned int txqueue);
 static void rtl8139_init_ring (struct net_device *dev);
 static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb,
 				       struct net_device *dev);
@@ -1700,7 +1700,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work)
 	spin_unlock_bh(&tp->rx_lock);
 }
 
-static void rtl8139_tx_timeout (struct net_device *dev)
+static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct rtl8139_private *tp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile
index d5304bad2372..2e1d78b106b0 100644
--- a/drivers/net/ethernet/realtek/Makefile
+++ b/drivers/net/ethernet/realtek/Makefile
@@ -6,5 +6,5 @@
 obj-$(CONFIG_8139CP) += 8139cp.o
 obj-$(CONFIG_8139TOO) += 8139too.o
 obj-$(CONFIG_ATP) += atp.o
-r8169-objs += r8169_main.o r8169_firmware.o
+r8169-objs += r8169_main.o r8169_firmware.o r8169_phy_config.o
 obj-$(CONFIG_R8169) += r8169.o
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 58e0ca9093d3..9e3b35c97e63 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -204,7 +204,7 @@ static void net_rx(struct net_device *dev);
 static void read_block(long ioaddr, int length, unsigned char *buffer, int data_mode);
 static int net_close(struct net_device *dev);
 static void set_rx_mode(struct net_device *dev);
-static void tx_timeout(struct net_device *dev);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
 
 
 /* A list of all installed ATP devices, for removing the driver module. */
@@ -533,7 +533,7 @@ static void write_packet(long ioaddr, int length, unsigned char *packet, int pad
     outb(Ctrl_HNibWrite | Ctrl_SelData | Ctrl_IRQEN, ioaddr + PAR_CONTROL);
 }
 
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	long ioaddr = dev->base_addr;
 
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
new file mode 100644
index 000000000000..22a6a057b11e
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* r8169.h: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw>
+ * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/types.h>
+#include <linux/phy.h>
+
+enum mac_version {
+	/* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
+	RTL_GIGA_MAC_VER_02,
+	RTL_GIGA_MAC_VER_03,
+	RTL_GIGA_MAC_VER_04,
+	RTL_GIGA_MAC_VER_05,
+	RTL_GIGA_MAC_VER_06,
+	RTL_GIGA_MAC_VER_07,
+	RTL_GIGA_MAC_VER_08,
+	RTL_GIGA_MAC_VER_09,
+	RTL_GIGA_MAC_VER_10,
+	RTL_GIGA_MAC_VER_11,
+	RTL_GIGA_MAC_VER_12,
+	RTL_GIGA_MAC_VER_13,
+	RTL_GIGA_MAC_VER_14,
+	RTL_GIGA_MAC_VER_15,
+	RTL_GIGA_MAC_VER_16,
+	RTL_GIGA_MAC_VER_17,
+	RTL_GIGA_MAC_VER_18,
+	RTL_GIGA_MAC_VER_19,
+	RTL_GIGA_MAC_VER_20,
+	RTL_GIGA_MAC_VER_21,
+	RTL_GIGA_MAC_VER_22,
+	RTL_GIGA_MAC_VER_23,
+	RTL_GIGA_MAC_VER_24,
+	RTL_GIGA_MAC_VER_25,
+	RTL_GIGA_MAC_VER_26,
+	RTL_GIGA_MAC_VER_27,
+	RTL_GIGA_MAC_VER_28,
+	RTL_GIGA_MAC_VER_29,
+	RTL_GIGA_MAC_VER_30,
+	RTL_GIGA_MAC_VER_31,
+	RTL_GIGA_MAC_VER_32,
+	RTL_GIGA_MAC_VER_33,
+	RTL_GIGA_MAC_VER_34,
+	RTL_GIGA_MAC_VER_35,
+	RTL_GIGA_MAC_VER_36,
+	RTL_GIGA_MAC_VER_37,
+	RTL_GIGA_MAC_VER_38,
+	RTL_GIGA_MAC_VER_39,
+	RTL_GIGA_MAC_VER_40,
+	RTL_GIGA_MAC_VER_41,
+	RTL_GIGA_MAC_VER_42,
+	RTL_GIGA_MAC_VER_43,
+	RTL_GIGA_MAC_VER_44,
+	RTL_GIGA_MAC_VER_45,
+	RTL_GIGA_MAC_VER_46,
+	RTL_GIGA_MAC_VER_47,
+	RTL_GIGA_MAC_VER_48,
+	RTL_GIGA_MAC_VER_49,
+	RTL_GIGA_MAC_VER_50,
+	RTL_GIGA_MAC_VER_51,
+	RTL_GIGA_MAC_VER_52,
+	RTL_GIGA_MAC_VER_60,
+	RTL_GIGA_MAC_VER_61,
+	RTL_GIGA_MAC_NONE
+};
+
+struct rtl8169_private;
+
+void r8169_apply_firmware(struct rtl8169_private *tp);
+u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
+u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr);
+void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
+			 enum mac_version ver);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 67a4d5d45e3a..aaa316be6183 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -31,6 +31,7 @@
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
 
+#include "r8169.h"
 #include "r8169_firmware.h"
 
 #define MODULENAME "r8169"
@@ -84,65 +85,6 @@
 #define RTL_R16(tp, reg)		readw(tp->mmio_addr + (reg))
 #define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
-enum mac_version {
-	/* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
-	RTL_GIGA_MAC_VER_02,
-	RTL_GIGA_MAC_VER_03,
-	RTL_GIGA_MAC_VER_04,
-	RTL_GIGA_MAC_VER_05,
-	RTL_GIGA_MAC_VER_06,
-	RTL_GIGA_MAC_VER_07,
-	RTL_GIGA_MAC_VER_08,
-	RTL_GIGA_MAC_VER_09,
-	RTL_GIGA_MAC_VER_10,
-	RTL_GIGA_MAC_VER_11,
-	RTL_GIGA_MAC_VER_12,
-	RTL_GIGA_MAC_VER_13,
-	RTL_GIGA_MAC_VER_14,
-	RTL_GIGA_MAC_VER_15,
-	RTL_GIGA_MAC_VER_16,
-	RTL_GIGA_MAC_VER_17,
-	RTL_GIGA_MAC_VER_18,
-	RTL_GIGA_MAC_VER_19,
-	RTL_GIGA_MAC_VER_20,
-	RTL_GIGA_MAC_VER_21,
-	RTL_GIGA_MAC_VER_22,
-	RTL_GIGA_MAC_VER_23,
-	RTL_GIGA_MAC_VER_24,
-	RTL_GIGA_MAC_VER_25,
-	RTL_GIGA_MAC_VER_26,
-	RTL_GIGA_MAC_VER_27,
-	RTL_GIGA_MAC_VER_28,
-	RTL_GIGA_MAC_VER_29,
-	RTL_GIGA_MAC_VER_30,
-	RTL_GIGA_MAC_VER_31,
-	RTL_GIGA_MAC_VER_32,
-	RTL_GIGA_MAC_VER_33,
-	RTL_GIGA_MAC_VER_34,
-	RTL_GIGA_MAC_VER_35,
-	RTL_GIGA_MAC_VER_36,
-	RTL_GIGA_MAC_VER_37,
-	RTL_GIGA_MAC_VER_38,
-	RTL_GIGA_MAC_VER_39,
-	RTL_GIGA_MAC_VER_40,
-	RTL_GIGA_MAC_VER_41,
-	RTL_GIGA_MAC_VER_42,
-	RTL_GIGA_MAC_VER_43,
-	RTL_GIGA_MAC_VER_44,
-	RTL_GIGA_MAC_VER_45,
-	RTL_GIGA_MAC_VER_46,
-	RTL_GIGA_MAC_VER_47,
-	RTL_GIGA_MAC_VER_48,
-	RTL_GIGA_MAC_VER_49,
-	RTL_GIGA_MAC_VER_50,
-	RTL_GIGA_MAC_VER_51,
-	RTL_GIGA_MAC_VER_52,
-	RTL_GIGA_MAC_VER_60,
-	RTL_GIGA_MAC_VER_61,
-	RTL_GIGA_MAC_NONE
-};
-
-#define JUMBO_1K	ETH_DATA_LEN
 #define JUMBO_4K	(4*1024 - ETH_HLEN - 2)
 #define JUMBO_6K	(6*1024 - ETH_HLEN - 2)
 #define JUMBO_7K	(7*1024 - ETH_HLEN - 2)
@@ -492,6 +434,7 @@ enum rtl_register_content {
 	/* CPlusCmd p.31 */
 	EnableBist	= (1 << 15),	// 8168 8101
 	Mac_dbgo_oe	= (1 << 14),	// 8168 8101
+	EnAnaPLL	= (1 << 14),	// 8169
 	Normal_mode	= (1 << 13),	// unused
 	Force_half_dup	= (1 << 12),	// 8168 8101
 	Force_rxflow_en	= (1 << 11),	// 8168 8101
@@ -1078,52 +1021,6 @@ static int rtl_readphy(struct rtl8169_private *tp, int location)
 	}
 }
 
-static void rtl_patchphy(struct rtl8169_private *tp, int reg_addr, int value)
-{
-	rtl_writephy(tp, reg_addr, rtl_readphy(tp, reg_addr) | value);
-}
-
-static void rtl_w0w1_phy(struct rtl8169_private *tp, int reg_addr, int p, int m)
-{
-	int val;
-
-	val = rtl_readphy(tp, reg_addr);
-	rtl_writephy(tp, reg_addr, (val & ~m) | p);
-}
-
-static void r8168d_modify_extpage(struct phy_device *phydev, int extpage,
-				  int reg, u16 mask, u16 val)
-{
-	int oldpage = phy_select_page(phydev, 0x0007);
-
-	__phy_write(phydev, 0x1e, extpage);
-	__phy_modify(phydev, reg, mask, val);
-
-	phy_restore_page(phydev, oldpage, 0);
-}
-
-static void r8168d_phy_param(struct phy_device *phydev, u16 parm,
-			     u16 mask, u16 val)
-{
-	int oldpage = phy_select_page(phydev, 0x0005);
-
-	__phy_write(phydev, 0x05, parm);
-	__phy_modify(phydev, 0x06, mask, val);
-
-	phy_restore_page(phydev, oldpage, 0);
-}
-
-static void r8168g_phy_param(struct phy_device *phydev, u16 parm,
-			     u16 mask, u16 val)
-{
-	int oldpage = phy_select_page(phydev, 0x0a43);
-
-	__phy_write(phydev, 0x13, parm);
-	__phy_modify(phydev, 0x14, mask, val);
-
-	phy_restore_page(phydev, oldpage, 0);
-}
-
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
 	return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
@@ -1372,7 +1269,7 @@ DECLARE_RTL_COND(rtl_efusear_cond)
 	return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
 }
 
-static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
+u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 {
 	RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
 
@@ -1596,7 +1493,7 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
 	if (dev->mtu > TD_MSS_MAX)
 		features &= ~NETIF_F_ALL_TSO;
 
-	if (dev->mtu > JUMBO_1K &&
+	if (dev->mtu > ETH_DATA_LEN &&
 	    tp->mac_version > RTL_GIGA_MAC_VER_06)
 		features &= ~(NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO);
 
@@ -2268,22 +2165,6 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp)
 	}
 }
 
-struct phy_reg {
-	u16 reg;
-	u16 val;
-};
-
-static void __rtl_writephy_batch(struct rtl8169_private *tp,
-				 const struct phy_reg *regs, int len)
-{
-	while (len-- > 0) {
-		rtl_writephy(tp, regs->reg, regs->val);
-		regs++;
-	}
-}
-
-#define rtl_writephy_batch(tp, a) __rtl_writephy_batch(tp, a, ARRAY_SIZE(a))
-
 static void rtl_release_firmware(struct rtl8169_private *tp)
 {
 	if (tp->rtl_fw) {
@@ -2293,7 +2174,7 @@ static void rtl_release_firmware(struct rtl8169_private *tp)
 	}
 }
 
-static void rtl_apply_firmware(struct rtl8169_private *tp)
+void r8169_apply_firmware(struct rtl8169_private *tp)
 {
 	/* TODO: release firmware if rtl_fw_write_firmware signals failure. */
 	if (tp->rtl_fw)
@@ -2315,594 +2196,6 @@ static void rtl8125_config_eee_mac(struct rtl8169_private *tp)
 	r8168_mac_ocp_modify(tp, 0xeb62, 0, BIT(2) | BIT(1));
 }
 
-static void rtl8168f_config_eee_phy(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	r8168d_modify_extpage(phydev, 0x0020, 0x15, 0, BIT(8));
-	r8168d_phy_param(phydev, 0x8b85, 0, BIT(13));
-}
-
-static void rtl8168g_config_eee_phy(struct rtl8169_private *tp)
-{
-	phy_modify_paged(tp->phydev, 0x0a43, 0x11, 0, BIT(4));
-}
-
-static void rtl8168h_config_eee_phy(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	rtl8168g_config_eee_phy(tp);
-
-	phy_modify_paged(phydev, 0xa4a, 0x11, 0x0000, 0x0200);
-	phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080);
-}
-
-static void rtl8125_config_eee_phy(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	rtl8168h_config_eee_phy(tp);
-
-	phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000);
-	phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000);
-}
-
-static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0001 },
-		{ 0x06, 0x006e },
-		{ 0x08, 0x0708 },
-		{ 0x15, 0x4000 },
-		{ 0x18, 0x65c7 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x03, 0x00a1 },
-		{ 0x02, 0x0008 },
-		{ 0x01, 0x0120 },
-		{ 0x00, 0x1000 },
-		{ 0x04, 0x0800 },
-		{ 0x04, 0x0000 },
-
-		{ 0x03, 0xff41 },
-		{ 0x02, 0xdf60 },
-		{ 0x01, 0x0140 },
-		{ 0x00, 0x0077 },
-		{ 0x04, 0x7800 },
-		{ 0x04, 0x7000 },
-
-		{ 0x03, 0x802f },
-		{ 0x02, 0x4f02 },
-		{ 0x01, 0x0409 },
-		{ 0x00, 0xf0f9 },
-		{ 0x04, 0x9800 },
-		{ 0x04, 0x9000 },
-
-		{ 0x03, 0xdf01 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0xff95 },
-		{ 0x00, 0xba00 },
-		{ 0x04, 0xa800 },
-		{ 0x04, 0xa000 },
-
-		{ 0x03, 0xff41 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0x0140 },
-		{ 0x00, 0x00bb },
-		{ 0x04, 0xb800 },
-		{ 0x04, 0xb000 },
-
-		{ 0x03, 0xdf41 },
-		{ 0x02, 0xdc60 },
-		{ 0x01, 0x6340 },
-		{ 0x00, 0x007d },
-		{ 0x04, 0xd800 },
-		{ 0x04, 0xd000 },
-
-		{ 0x03, 0xdf01 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0x100a },
-		{ 0x00, 0xa0ff },
-		{ 0x04, 0xf800 },
-		{ 0x04, 0xf000 },
-
-		{ 0x1f, 0x0000 },
-		{ 0x0b, 0x0000 },
-		{ 0x00, 0x9200 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init);
-}
-
-static void rtl8169sb_hw_phy_config(struct rtl8169_private *tp)
-{
-	phy_write_paged(tp->phydev, 0x0002, 0x01, 0x90d0);
-}
-
-static void rtl8169scd_hw_phy_config_quirk(struct rtl8169_private *tp)
-{
-	struct pci_dev *pdev = tp->pci_dev;
-
-	if ((pdev->subsystem_vendor != PCI_VENDOR_ID_GIGABYTE) ||
-	    (pdev->subsystem_device != 0xe000))
-		return;
-
-	phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf01b);
-}
-
-static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0001 },
-		{ 0x04, 0x0000 },
-		{ 0x03, 0x00a1 },
-		{ 0x02, 0x0008 },
-		{ 0x01, 0x0120 },
-		{ 0x00, 0x1000 },
-		{ 0x04, 0x0800 },
-		{ 0x04, 0x9000 },
-		{ 0x03, 0x802f },
-		{ 0x02, 0x4f02 },
-		{ 0x01, 0x0409 },
-		{ 0x00, 0xf099 },
-		{ 0x04, 0x9800 },
-		{ 0x04, 0xa000 },
-		{ 0x03, 0xdf01 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0xff95 },
-		{ 0x00, 0xba00 },
-		{ 0x04, 0xa800 },
-		{ 0x04, 0xf000 },
-		{ 0x03, 0xdf01 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0x101a },
-		{ 0x00, 0xa0ff },
-		{ 0x04, 0xf800 },
-		{ 0x04, 0x0000 },
-		{ 0x1f, 0x0000 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x10, 0xf41b },
-		{ 0x14, 0xfb54 },
-		{ 0x18, 0xf5c7 },
-		{ 0x1f, 0x0000 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
-		{ 0x1f, 0x0000 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init);
-
-	rtl8169scd_hw_phy_config_quirk(tp);
-}
-
-static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0001 },
-		{ 0x04, 0x0000 },
-		{ 0x03, 0x00a1 },
-		{ 0x02, 0x0008 },
-		{ 0x01, 0x0120 },
-		{ 0x00, 0x1000 },
-		{ 0x04, 0x0800 },
-		{ 0x04, 0x9000 },
-		{ 0x03, 0x802f },
-		{ 0x02, 0x4f02 },
-		{ 0x01, 0x0409 },
-		{ 0x00, 0xf099 },
-		{ 0x04, 0x9800 },
-		{ 0x04, 0xa000 },
-		{ 0x03, 0xdf01 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0xff95 },
-		{ 0x00, 0xba00 },
-		{ 0x04, 0xa800 },
-		{ 0x04, 0xf000 },
-		{ 0x03, 0xdf01 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0x101a },
-		{ 0x00, 0xa0ff },
-		{ 0x04, 0xf800 },
-		{ 0x04, 0x0000 },
-		{ 0x1f, 0x0000 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x0b, 0x8480 },
-		{ 0x1f, 0x0000 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x18, 0x67c7 },
-		{ 0x04, 0x2000 },
-		{ 0x03, 0x002f },
-		{ 0x02, 0x4360 },
-		{ 0x01, 0x0109 },
-		{ 0x00, 0x3022 },
-		{ 0x04, 0x2800 },
-		{ 0x1f, 0x0000 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
-		{ 0x1f, 0x0000 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init);
-}
-
-static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp)
-{
-	rtl_writephy(tp, 0x1f, 0x0001);
-	rtl_patchphy(tp, 0x16, 1 << 0);
-	rtl_writephy(tp, 0x10, 0xf41b);
-	rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp)
-{
-	phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf41b);
-}
-
-static void rtl8168cp_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	phy_write(tp->phydev, 0x1d, 0x0f00);
-	phy_write_paged(tp->phydev, 0x0002, 0x0c, 0x1ec8);
-}
-
-static void rtl8168cp_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	phy_set_bits(tp->phydev, 0x14, BIT(5));
-	phy_set_bits(tp->phydev, 0x0d, BIT(5));
-	phy_write_paged(tp->phydev, 0x0001, 0x1d, 0x3d98);
-}
-
-static void rtl8168c_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0001 },
-		{ 0x12, 0x2300 },
-		{ 0x1f, 0x0002 },
-		{ 0x00, 0x88d4 },
-		{ 0x01, 0x82b1 },
-		{ 0x03, 0x7002 },
-		{ 0x08, 0x9e30 },
-		{ 0x09, 0x01f0 },
-		{ 0x0a, 0x5500 },
-		{ 0x0c, 0x00c8 },
-		{ 0x1f, 0x0003 },
-		{ 0x12, 0xc096 },
-		{ 0x16, 0x000a },
-		{ 0x1f, 0x0000 },
-		{ 0x1f, 0x0000 },
-		{ 0x09, 0x2000 },
-		{ 0x09, 0x0000 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init);
-
-	rtl_patchphy(tp, 0x14, 1 << 5);
-	rtl_patchphy(tp, 0x0d, 1 << 5);
-	rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168c_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0001 },
-		{ 0x12, 0x2300 },
-		{ 0x03, 0x802f },
-		{ 0x02, 0x4f02 },
-		{ 0x01, 0x0409 },
-		{ 0x00, 0xf099 },
-		{ 0x04, 0x9800 },
-		{ 0x04, 0x9000 },
-		{ 0x1d, 0x3d98 },
-		{ 0x1f, 0x0002 },
-		{ 0x0c, 0x7eb8 },
-		{ 0x06, 0x0761 },
-		{ 0x1f, 0x0003 },
-		{ 0x16, 0x0f0a },
-		{ 0x1f, 0x0000 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init);
-
-	rtl_patchphy(tp, 0x16, 1 << 0);
-	rtl_patchphy(tp, 0x14, 1 << 5);
-	rtl_patchphy(tp, 0x0d, 1 << 5);
-	rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168c_3_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0001 },
-		{ 0x12, 0x2300 },
-		{ 0x1d, 0x3d98 },
-		{ 0x1f, 0x0002 },
-		{ 0x0c, 0x7eb8 },
-		{ 0x06, 0x5461 },
-		{ 0x1f, 0x0003 },
-		{ 0x16, 0x0f0a },
-		{ 0x1f, 0x0000 }
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init);
-
-	rtl_patchphy(tp, 0x16, 1 << 0);
-	rtl_patchphy(tp, 0x14, 1 << 5);
-	rtl_patchphy(tp, 0x0d, 1 << 5);
-	rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
-	/* Channel Estimation */
-	{ 0x1f, 0x0001 },
-	{ 0x06, 0x4064 },
-	{ 0x07, 0x2863 },
-	{ 0x08, 0x059c },
-	{ 0x09, 0x26b4 },
-	{ 0x0a, 0x6a19 },
-	{ 0x0b, 0xdcc8 },
-	{ 0x10, 0xf06d },
-	{ 0x14, 0x7f68 },
-	{ 0x18, 0x7fd9 },
-	{ 0x1c, 0xf0ff },
-	{ 0x1d, 0x3d9c },
-	{ 0x1f, 0x0003 },
-	{ 0x12, 0xf49f },
-	{ 0x13, 0x070b },
-	{ 0x1a, 0x05ad },
-	{ 0x14, 0x94c0 },
-
-	/*
-	 * Tx Error Issue
-	 * Enhance line driver power
-	 */
-	{ 0x1f, 0x0002 },
-	{ 0x06, 0x5561 },
-	{ 0x1f, 0x0005 },
-	{ 0x05, 0x8332 },
-	{ 0x06, 0x5561 },
-
-	/*
-	 * Can not link to 1Gbps with bad cable
-	 * Decrease SNR threshold form 21.07dB to 19.04dB
-	 */
-	{ 0x1f, 0x0001 },
-	{ 0x17, 0x0cc0 },
-
-	{ 0x1f, 0x0000 },
-	{ 0x0d, 0xf880 }
-};
-
-static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
-	{ 0x1f, 0x0002 },
-	{ 0x05, 0x669a },
-	{ 0x1f, 0x0005 },
-	{ 0x05, 0x8330 },
-	{ 0x06, 0x669a },
-	{ 0x1f, 0x0002 }
-};
-
-static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, u16 val)
-{
-	u16 reg_val;
-
-	rtl_writephy(tp, 0x1f, 0x0005);
-	rtl_writephy(tp, 0x05, 0x001b);
-	reg_val = rtl_readphy(tp, 0x06);
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	if (reg_val != val)
-		netif_warn(tp, hw, tp->dev, "chipset not ready for firmware\n");
-	else
-		rtl_apply_firmware(tp);
-}
-
-static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
-
-	/*
-	 * Rx Error Issue
-	 * Fine Tune Switching regulator parameter
-	 */
-	rtl_writephy(tp, 0x1f, 0x0002);
-	rtl_w0w1_phy(tp, 0x0b, 0x0010, 0x00ef);
-	rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00);
-
-	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		int val;
-
-		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
-
-		val = rtl_readphy(tp, 0x0d);
-
-		if ((val & 0x00ff) != 0x006c) {
-			static const u32 set[] = {
-				0x0065, 0x0066, 0x0067, 0x0068,
-				0x0069, 0x006a, 0x006b, 0x006c
-			};
-			int i;
-
-			rtl_writephy(tp, 0x1f, 0x0002);
-
-			val &= 0xff00;
-			for (i = 0; i < ARRAY_SIZE(set); i++)
-				rtl_writephy(tp, 0x0d, val | set[i]);
-		}
-	} else {
-		phy_write_paged(tp->phydev, 0x0002, 0x05, 0x6662);
-		r8168d_phy_param(tp->phydev, 0x8330, 0xffff, 0x6662);
-	}
-
-	/* RSET couple improve */
-	rtl_writephy(tp, 0x1f, 0x0002);
-	rtl_patchphy(tp, 0x0d, 0x0300);
-	rtl_patchphy(tp, 0x0f, 0x0010);
-
-	/* Fine tune PLL performance */
-	rtl_writephy(tp, 0x1f, 0x0002);
-	rtl_w0w1_phy(tp, 0x02, 0x0100, 0x0600);
-	rtl_w0w1_phy(tp, 0x03, 0x0000, 0xe000);
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	rtl8168d_apply_firmware_cond(tp, 0xbf00);
-}
-
-static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
-
-	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
-		int val;
-
-		rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
-
-		val = rtl_readphy(tp, 0x0d);
-		if ((val & 0x00ff) != 0x006c) {
-			static const u32 set[] = {
-				0x0065, 0x0066, 0x0067, 0x0068,
-				0x0069, 0x006a, 0x006b, 0x006c
-			};
-			int i;
-
-			rtl_writephy(tp, 0x1f, 0x0002);
-
-			val &= 0xff00;
-			for (i = 0; i < ARRAY_SIZE(set); i++)
-				rtl_writephy(tp, 0x0d, val | set[i]);
-		}
-	} else {
-		phy_write_paged(tp->phydev, 0x0002, 0x05, 0x2642);
-		r8168d_phy_param(tp->phydev, 0x8330, 0xffff, 0x2642);
-	}
-
-	/* Fine tune PLL performance */
-	rtl_writephy(tp, 0x1f, 0x0002);
-	rtl_w0w1_phy(tp, 0x02, 0x0100, 0x0600);
-	rtl_w0w1_phy(tp, 0x03, 0x0000, 0xe000);
-
-	/* Switching regulator Slew rate */
-	rtl_writephy(tp, 0x1f, 0x0002);
-	rtl_patchphy(tp, 0x0f, 0x0017);
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	rtl8168d_apply_firmware_cond(tp, 0xb300);
-}
-
-static void rtl8168d_3_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0002 },
-		{ 0x10, 0x0008 },
-		{ 0x0d, 0x006c },
-
-		{ 0x1f, 0x0000 },
-		{ 0x0d, 0xf880 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x17, 0x0cc0 },
-
-		{ 0x1f, 0x0001 },
-		{ 0x0b, 0xa4d8 },
-		{ 0x09, 0x281c },
-		{ 0x07, 0x2883 },
-		{ 0x0a, 0x6b35 },
-		{ 0x1d, 0x3da4 },
-		{ 0x1c, 0xeffd },
-		{ 0x14, 0x7f52 },
-		{ 0x18, 0x7fc6 },
-		{ 0x08, 0x0601 },
-		{ 0x06, 0x4063 },
-		{ 0x10, 0xf074 },
-		{ 0x1f, 0x0003 },
-		{ 0x13, 0x0789 },
-		{ 0x12, 0xf4bd },
-		{ 0x1a, 0x04fd },
-		{ 0x14, 0x84b0 },
-		{ 0x1f, 0x0000 },
-		{ 0x00, 0x9200 },
-
-		{ 0x1f, 0x0005 },
-		{ 0x01, 0x0340 },
-		{ 0x1f, 0x0001 },
-		{ 0x04, 0x4000 },
-		{ 0x03, 0x1d21 },
-		{ 0x02, 0x0c32 },
-		{ 0x01, 0x0200 },
-		{ 0x00, 0x5554 },
-		{ 0x04, 0x4800 },
-		{ 0x04, 0x4000 },
-		{ 0x04, 0xf000 },
-		{ 0x03, 0xdf01 },
-		{ 0x02, 0xdf20 },
-		{ 0x01, 0x101a },
-		{ 0x00, 0xa0ff },
-		{ 0x04, 0xf800 },
-		{ 0x04, 0xf000 },
-		{ 0x1f, 0x0000 },
-	};
-
-	rtl_writephy_batch(tp, phy_reg_init);
-
-	r8168d_modify_extpage(tp->phydev, 0x0023, 0x16, 0xffff, 0x0000);
-}
-
-static void rtl8168d_4_hw_phy_config(struct rtl8169_private *tp)
-{
-	phy_write_paged(tp->phydev, 0x0001, 0x17, 0x0cc0);
-	r8168d_modify_extpage(tp->phydev, 0x002d, 0x18, 0xffff, 0x0040);
-	phy_set_bits(tp->phydev, 0x0d, BIT(5));
-}
-
-static void rtl8168e_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		/* Channel estimation fine tune */
-		{ 0x1f, 0x0001 },
-		{ 0x0b, 0x6c20 },
-		{ 0x07, 0x2872 },
-		{ 0x1c, 0xefff },
-		{ 0x1f, 0x0003 },
-		{ 0x14, 0x6420 },
-		{ 0x1f, 0x0000 },
-	};
-	struct phy_device *phydev = tp->phydev;
-
-	rtl_apply_firmware(tp);
-
-	/* Enable Delay cap */
-	r8168d_phy_param(phydev, 0x8b80, 0xffff, 0xc896);
-
-	rtl_writephy_batch(tp, phy_reg_init);
-
-	/* Update PFM & 10M TX idle timer */
-	r8168d_modify_extpage(phydev, 0x002f, 0x15, 0xffff, 0x1919);
-
-	r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
-
-	/* DCO enable for 10M IDLE Power */
-	r8168d_modify_extpage(phydev, 0x0023, 0x17, 0x0000, 0x0006);
-
-	/* For impedance matching */
-	phy_modify_paged(phydev, 0x0002, 0x08, 0x7f00, 0x8000);
-
-	/* PHY auto speed down */
-	r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0050);
-	phy_set_bits(phydev, 0x14, BIT(15));
-
-	r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
-	r8168d_phy_param(phydev, 0x8b85, 0x2000, 0x0000);
-
-	r8168d_modify_extpage(phydev, 0x0020, 0x15, 0x1100, 0x0000);
-	phy_write_paged(phydev, 0x0006, 0x00, 0x5a00);
-
-	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0000);
-}
-
 static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr)
 {
 	const u16 w[] = {
@@ -2917,698 +2210,20 @@ static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr)
 	rtl_eri_write(tp, 0xf4, ERIAR_MASK_1111, w[1] | (w[2] << 16));
 }
 
-static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
+u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp)
 {
-	struct phy_device *phydev = tp->phydev;
-
-	rtl_apply_firmware(tp);
-
-	/* Enable Delay cap */
-	r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
-
-	/* Channel estimation fine tune */
-	phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
-
-	/* Green Setting */
-	r8168d_phy_param(phydev, 0x8b5b, 0xffff, 0x9222);
-	r8168d_phy_param(phydev, 0x8b6d, 0xffff, 0x8000);
-	r8168d_phy_param(phydev, 0x8b76, 0xffff, 0x8000);
-
-	/* For 4-corner performance improve */
-	rtl_writephy(tp, 0x1f, 0x0005);
-	rtl_writephy(tp, 0x05, 0x8b80);
-	rtl_w0w1_phy(tp, 0x17, 0x0006, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	/* PHY auto speed down */
-	r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
-	phy_set_bits(phydev, 0x14, BIT(15));
-
-	/* improve 10M EEE waveform */
-	r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
-
-	/* Improve 2-pair detection performance */
-	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
-
-	rtl8168f_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-
-	/* Green feature */
-	rtl_writephy(tp, 0x1f, 0x0003);
-	rtl_w0w1_phy(tp, 0x19, 0x0001, 0x0000);
-	rtl_w0w1_phy(tp, 0x10, 0x0400, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0005);
-	rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	/* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
-	rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
-}
-
-static void rtl8168f_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	/* For 4-corner performance improve */
-	r8168d_phy_param(phydev, 0x8b80, 0x0000, 0x0006);
-
-	/* PHY auto speed down */
-	r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
-	phy_set_bits(phydev, 0x14, BIT(15));
-
-	/* Improve 10M EEE waveform */
-	r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
-
-	rtl8168f_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	rtl_apply_firmware(tp);
-
-	/* Channel estimation fine tune */
-	phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
-
-	/* Modify green table for giga & fnet */
-	r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
-	r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
-	r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
-	r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
-	r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
-	r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00fb);
-
-	/* Modify green table for 10M */
-	r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
-
-	/* Disable hiimpedance detection (RTCT) */
-	phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
-
-	rtl8168f_hw_phy_config(tp);
-
-	/* Improve 2-pair detection performance */
-	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
-}
-
-static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	rtl_apply_firmware(tp);
-
-	rtl8168f_hw_phy_config(tp);
-}
-
-static void rtl8411_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	rtl_apply_firmware(tp);
-
-	rtl8168f_hw_phy_config(tp);
-
-	/* Improve 2-pair detection performance */
-	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
-
-	/* Channel estimation fine tune */
-	phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
-
-	/* Modify green table for giga & fnet */
-	r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
-	r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
-	r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
-	r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
-	r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
-	r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00aa);
-
-	/* Modify green table for 10M */
-	r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
-
-	/* Disable hiimpedance detection (RTCT) */
-	phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
-
-	/* Modify green table for giga */
-	r8168d_phy_param(phydev, 0x8b54, 0x0800, 0x0000);
-	r8168d_phy_param(phydev, 0x8b5d, 0x0800, 0x0000);
-	r8168d_phy_param(phydev, 0x8a7c, 0x0100, 0x0000);
-	r8168d_phy_param(phydev, 0x8a7f, 0x0000, 0x0100);
-	r8168d_phy_param(phydev, 0x8a82, 0x0100, 0x0000);
-	r8168d_phy_param(phydev, 0x8a85, 0x0100, 0x0000);
-	r8168d_phy_param(phydev, 0x8a88, 0x0100, 0x0000);
-
-	/* uc same-seed solution */
-	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x8000);
-
-	/* Green feature */
-	rtl_writephy(tp, 0x1f, 0x0003);
-	rtl_w0w1_phy(tp, 0x19, 0x0000, 0x0001);
-	rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0400);
-	rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168g_disable_aldps(struct rtl8169_private *tp)
-{
-	phy_modify_paged(tp->phydev, 0x0a43, 0x10, BIT(2), 0);
-}
-
-static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0);
-	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6));
-	r8168g_phy_param(phydev, 0x8084, 0x6000, 0x0000);
-	phy_modify_paged(phydev, 0x0a43, 0x10, 0x0000, 0x1003);
-}
-
-static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	int ret;
-
-	rtl_apply_firmware(tp);
-
-	ret = phy_read_paged(tp->phydev, 0x0a46, 0x10);
-	if (ret & BIT(8))
-		phy_modify_paged(tp->phydev, 0x0bcc, 0x12, BIT(15), 0);
-	else
-		phy_modify_paged(tp->phydev, 0x0bcc, 0x12, 0, BIT(15));
-
-	ret = phy_read_paged(tp->phydev, 0x0a46, 0x13);
-	if (ret & BIT(8))
-		phy_modify_paged(tp->phydev, 0x0c41, 0x15, 0, BIT(1));
-	else
-		phy_modify_paged(tp->phydev, 0x0c41, 0x15, BIT(1), 0);
-
-	/* Enable PHY auto speed down */
-	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
-
-	rtl8168g_phy_adjust_10m_aldps(tp);
-
-	/* EEE auto-fallback function */
-	phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2));
-
-	/* Enable UC LPF tune function */
-	r8168g_phy_param(tp->phydev, 0x8012, 0x0000, 0x8000);
-
-	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
-
-	/* Improve SWR Efficiency */
-	rtl_writephy(tp, 0x1f, 0x0bcd);
-	rtl_writephy(tp, 0x14, 0x5065);
-	rtl_writephy(tp, 0x14, 0xd065);
-	rtl_writephy(tp, 0x1f, 0x0bc8);
-	rtl_writephy(tp, 0x11, 0x5655);
-	rtl_writephy(tp, 0x1f, 0x0bcd);
-	rtl_writephy(tp, 0x14, 0x1065);
-	rtl_writephy(tp, 0x14, 0x9065);
-	rtl_writephy(tp, 0x14, 0x1065);
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	rtl8168g_disable_aldps(tp);
-	rtl8168g_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	rtl_apply_firmware(tp);
-	rtl8168g_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-	u16 dout_tapbin;
-	u32 data;
-
-	rtl_apply_firmware(tp);
-
-	/* CHN EST parameters adjust - giga master */
-	r8168g_phy_param(phydev, 0x809b, 0xf800, 0x8000);
-	r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x8000);
-	r8168g_phy_param(phydev, 0x80a4, 0xff00, 0x8500);
-	r8168g_phy_param(phydev, 0x809c, 0xff00, 0xbd00);
-
-	/* CHN EST parameters adjust - giga slave */
-	r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x7000);
-	r8168g_phy_param(phydev, 0x80b4, 0xff00, 0x5000);
-	r8168g_phy_param(phydev, 0x80ac, 0xff00, 0x4000);
-
-	/* CHN EST parameters adjust - fnet */
-	r8168g_phy_param(phydev, 0x808e, 0xff00, 0x1200);
-	r8168g_phy_param(phydev, 0x8090, 0xff00, 0xe500);
-	r8168g_phy_param(phydev, 0x8092, 0xff00, 0x9f00);
-
-	/* enable R-tune & PGA-retune function */
-	dout_tapbin = 0;
-	data = phy_read_paged(phydev, 0x0a46, 0x13);
-	data &= 3;
-	data <<= 2;
-	dout_tapbin |= data;
-	data = phy_read_paged(phydev, 0x0a46, 0x12);
-	data &= 0xc000;
-	data >>= 14;
-	dout_tapbin |= data;
-	dout_tapbin = ~(dout_tapbin^0x08);
-	dout_tapbin <<= 12;
-	dout_tapbin &= 0xf000;
-
-	r8168g_phy_param(phydev, 0x827a, 0xf000, dout_tapbin);
-	r8168g_phy_param(phydev, 0x827b, 0xf000, dout_tapbin);
-	r8168g_phy_param(phydev, 0x827c, 0xf000, dout_tapbin);
-	r8168g_phy_param(phydev, 0x827d, 0xf000, dout_tapbin);
-	r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
-	phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
-
-	/* enable GPHY 10M */
-	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
-
-	/* SAR ADC performance */
-	phy_modify_paged(tp->phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14));
-
-	r8168g_phy_param(phydev, 0x803f, 0x3000, 0x0000);
-	r8168g_phy_param(phydev, 0x8047, 0x3000, 0x0000);
-	r8168g_phy_param(phydev, 0x804f, 0x3000, 0x0000);
-	r8168g_phy_param(phydev, 0x8057, 0x3000, 0x0000);
-	r8168g_phy_param(phydev, 0x805f, 0x3000, 0x0000);
-	r8168g_phy_param(phydev, 0x8067, 0x3000, 0x0000);
-	r8168g_phy_param(phydev, 0x806f, 0x3000, 0x0000);
-
-	/* disable phy pfm mode */
-	phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0);
-
-	rtl8168g_disable_aldps(tp);
-	rtl8168h_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	u16 ioffset_p3, ioffset_p2, ioffset_p1, ioffset_p0;
-	struct phy_device *phydev = tp->phydev;
-	u16 rlen;
-	u32 data;
-
-	rtl_apply_firmware(tp);
-
-	/* CHIN EST parameter update */
-	r8168g_phy_param(phydev, 0x808a, 0x003f, 0x000a);
-
-	/* enable R-tune & PGA-retune function */
-	r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
-	phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
-
-	/* enable GPHY 10M */
-	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
+	u16 data1, data2, ioffset;
 
 	r8168_mac_ocp_write(tp, 0xdd02, 0x807d);
-	data = r8168_mac_ocp_read(tp, 0xdd02);
-	ioffset_p3 = ((data & 0x80)>>7);
-	ioffset_p3 <<= 3;
-
-	data = r8168_mac_ocp_read(tp, 0xdd00);
-	ioffset_p3 |= ((data & (0xe000))>>13);
-	ioffset_p2 = ((data & (0x1e00))>>9);
-	ioffset_p1 = ((data & (0x01e0))>>5);
-	ioffset_p0 = ((data & 0x0010)>>4);
-	ioffset_p0 <<= 3;
-	ioffset_p0 |= (data & (0x07));
-	data = (ioffset_p3<<12)|(ioffset_p2<<8)|(ioffset_p1<<4)|(ioffset_p0);
-
-	if ((ioffset_p3 != 0x0f) || (ioffset_p2 != 0x0f) ||
-	    (ioffset_p1 != 0x0f) || (ioffset_p0 != 0x0f))
-		phy_write_paged(phydev, 0x0bcf, 0x16, data);
-
-	/* Modify rlen (TX LPF corner frequency) level */
-	data = phy_read_paged(phydev, 0x0bcd, 0x16);
-	data &= 0x000f;
-	rlen = 0;
-	if (data > 3)
-		rlen = data - 3;
-	data = rlen | (rlen<<4) | (rlen<<8) | (rlen<<12);
-	phy_write_paged(phydev, 0x0bcd, 0x17, data);
-
-	/* disable phy pfm mode */
-	phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
-
-	rtl8168g_disable_aldps(tp);
-	rtl8168g_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	/* Enable PHY auto speed down */
-	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
-
-	rtl8168g_phy_adjust_10m_aldps(tp);
-
-	/* Enable EEE auto-fallback function */
-	phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2));
-
-	/* Enable UC LPF tune function */
-	r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
-
-	/* set rg_sel_sdm_rate */
-	phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
-
-	rtl8168g_disable_aldps(tp);
-	rtl8168g_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	rtl8168g_phy_adjust_10m_aldps(tp);
-
-	/* Enable UC LPF tune function */
-	r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
-
-	/* Set rg_sel_sdm_rate */
-	phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
-
-	/* Channel estimation parameters */
-	r8168g_phy_param(phydev, 0x80f3, 0xff00, 0x8b00);
-	r8168g_phy_param(phydev, 0x80f0, 0xff00, 0x3a00);
-	r8168g_phy_param(phydev, 0x80ef, 0xff00, 0x0500);
-	r8168g_phy_param(phydev, 0x80f6, 0xff00, 0x6e00);
-	r8168g_phy_param(phydev, 0x80ec, 0xff00, 0x6800);
-	r8168g_phy_param(phydev, 0x80ed, 0xff00, 0x7c00);
-	r8168g_phy_param(phydev, 0x80f2, 0xff00, 0xf400);
-	r8168g_phy_param(phydev, 0x80f4, 0xff00, 0x8500);
-	r8168g_phy_param(phydev, 0x8110, 0xff00, 0xa800);
-	r8168g_phy_param(phydev, 0x810f, 0xff00, 0x1d00);
-	r8168g_phy_param(phydev, 0x8111, 0xff00, 0xf500);
-	r8168g_phy_param(phydev, 0x8113, 0xff00, 0x6100);
-	r8168g_phy_param(phydev, 0x8115, 0xff00, 0x9200);
-	r8168g_phy_param(phydev, 0x810e, 0xff00, 0x0400);
-	r8168g_phy_param(phydev, 0x810c, 0xff00, 0x7c00);
-	r8168g_phy_param(phydev, 0x810b, 0xff00, 0x5a00);
-	r8168g_phy_param(phydev, 0x80d1, 0xff00, 0xff00);
-	r8168g_phy_param(phydev, 0x80cd, 0xff00, 0x9e00);
-	r8168g_phy_param(phydev, 0x80d3, 0xff00, 0x0e00);
-	r8168g_phy_param(phydev, 0x80d5, 0xff00, 0xca00);
-	r8168g_phy_param(phydev, 0x80d7, 0xff00, 0x8400);
-
-	/* Force PWM-mode */
-	rtl_writephy(tp, 0x1f, 0x0bcd);
-	rtl_writephy(tp, 0x14, 0x5065);
-	rtl_writephy(tp, 0x14, 0xd065);
-	rtl_writephy(tp, 0x1f, 0x0bc8);
-	rtl_writephy(tp, 0x12, 0x00ed);
-	rtl_writephy(tp, 0x1f, 0x0bcd);
-	rtl_writephy(tp, 0x14, 0x1065);
-	rtl_writephy(tp, 0x14, 0x9065);
-	rtl_writephy(tp, 0x14, 0x1065);
-	rtl_writephy(tp, 0x1f, 0x0000);
-
-	rtl8168g_disable_aldps(tp);
-	rtl8168g_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8117_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	/* CHN EST parameters adjust - fnet */
-	r8168g_phy_param(phydev, 0x808e, 0xff00, 0x4800);
-	r8168g_phy_param(phydev, 0x8090, 0xff00, 0xcc00);
-	r8168g_phy_param(phydev, 0x8092, 0xff00, 0xb000);
-
-	r8168g_phy_param(phydev, 0x8088, 0xff00, 0x6000);
-	r8168g_phy_param(phydev, 0x808b, 0x3f00, 0x0b00);
-	r8168g_phy_param(phydev, 0x808d, 0x1f00, 0x0600);
-	r8168g_phy_param(phydev, 0x808c, 0xff00, 0xb000);
-	r8168g_phy_param(phydev, 0x80a0, 0xff00, 0x2800);
-	r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x5000);
-	r8168g_phy_param(phydev, 0x809b, 0xf800, 0xb000);
-	r8168g_phy_param(phydev, 0x809a, 0xff00, 0x4b00);
-	r8168g_phy_param(phydev, 0x809d, 0x3f00, 0x0800);
-	r8168g_phy_param(phydev, 0x80a1, 0xff00, 0x7000);
-	r8168g_phy_param(phydev, 0x809f, 0x1f00, 0x0300);
-	r8168g_phy_param(phydev, 0x809e, 0xff00, 0x8800);
-	r8168g_phy_param(phydev, 0x80b2, 0xff00, 0x2200);
-	r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x9800);
-	r8168g_phy_param(phydev, 0x80af, 0x3f00, 0x0800);
-	r8168g_phy_param(phydev, 0x80b3, 0xff00, 0x6f00);
-	r8168g_phy_param(phydev, 0x80b1, 0x1f00, 0x0300);
-	r8168g_phy_param(phydev, 0x80b0, 0xff00, 0x9300);
-
-	r8168g_phy_param(phydev, 0x8011, 0x0000, 0x0800);
-
-	/* enable GPHY 10M */
-	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
-
-	r8168g_phy_param(phydev, 0x8016, 0x0000, 0x0400);
-
-	rtl8168g_disable_aldps(tp);
-	rtl8168h_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8102e_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0003 },
-		{ 0x08, 0x441d },
-		{ 0x01, 0x9100 },
-		{ 0x1f, 0x0000 }
-	};
-
-	rtl_writephy(tp, 0x1f, 0x0000);
-	rtl_patchphy(tp, 0x11, 1 << 12);
-	rtl_patchphy(tp, 0x19, 1 << 13);
-	rtl_patchphy(tp, 0x10, 1 << 15);
-
-	rtl_writephy_batch(tp, phy_reg_init);
-}
-
-static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
-{
-	/* Disable ALDPS before ram code */
-	phy_write(tp->phydev, 0x18, 0x0310);
-	msleep(100);
-
-	rtl_apply_firmware(tp);
-
-	phy_write_paged(tp->phydev, 0x0005, 0x1a, 0x0000);
-	phy_write_paged(tp->phydev, 0x0004, 0x1c, 0x0000);
-	phy_write_paged(tp->phydev, 0x0001, 0x15, 0x7701);
-}
-
-static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
-{
-	/* Disable ALDPS before setting firmware */
-	phy_write(tp->phydev, 0x18, 0x0310);
-	msleep(20);
-
-	rtl_apply_firmware(tp);
-
-	/* EEE setting */
-	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
-	rtl_writephy(tp, 0x1f, 0x0004);
-	rtl_writephy(tp, 0x10, 0x401f);
-	rtl_writephy(tp, 0x19, 0x7030);
-	rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
-{
-	static const struct phy_reg phy_reg_init[] = {
-		{ 0x1f, 0x0004 },
-		{ 0x10, 0xc07f },
-		{ 0x19, 0x7030 },
-		{ 0x1f, 0x0000 }
-	};
-
-	/* Disable ALDPS before ram code */
-	phy_write(tp->phydev, 0x18, 0x0310);
-	msleep(100);
-
-	rtl_apply_firmware(tp);
-
-	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
-	rtl_writephy_batch(tp, phy_reg_init);
-
-	rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
-}
-
-static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-
-	phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084);
-	phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
-	phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006);
-	phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
-	phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100);
-	phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000);
-	phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400);
-	phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff);
-	phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff);
-
-	r8168g_phy_param(phydev, 0x80ea, 0xff00, 0xc400);
-	r8168g_phy_param(phydev, 0x80eb, 0x0700, 0x0300);
-	r8168g_phy_param(phydev, 0x80f8, 0xff00, 0x1c00);
-	r8168g_phy_param(phydev, 0x80f1, 0xff00, 0x3000);
-	r8168g_phy_param(phydev, 0x80fe, 0xff00, 0xa500);
-	r8168g_phy_param(phydev, 0x8102, 0xff00, 0x5000);
-	r8168g_phy_param(phydev, 0x8105, 0xff00, 0x3300);
-	r8168g_phy_param(phydev, 0x8100, 0xff00, 0x7000);
-	r8168g_phy_param(phydev, 0x8104, 0xff00, 0xf000);
-	r8168g_phy_param(phydev, 0x8106, 0xff00, 0x6500);
-	r8168g_phy_param(phydev, 0x80dc, 0xff00, 0xed00);
-	r8168g_phy_param(phydev, 0x80df, 0x0000, 0x0100);
-	r8168g_phy_param(phydev, 0x80e1, 0x0100, 0x0000);
-
-	phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038);
-	r8168g_phy_param(phydev, 0x819f, 0xffff, 0xd0b6);
-
-	phy_write_paged(phydev, 0xbc3, 0x12, 0x5555);
-	phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
-	phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
-	phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
-
-	rtl8125_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp)
-{
-	struct phy_device *phydev = tp->phydev;
-	int i;
+	data1 = r8168_mac_ocp_read(tp, 0xdd02);
+	data2 = r8168_mac_ocp_read(tp, 0xdd00);
 
-	phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
-	phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff);
-	phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
-	phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000);
-	phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002);
-	phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044);
-	phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000);
-	phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000);
-	phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002);
-	phy_write_paged(phydev, 0xad4, 0x16, 0x00a8);
-	phy_write_paged(phydev, 0xac5, 0x16, 0x01ff);
-	phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030);
-
-	phy_write(phydev, 0x1f, 0x0b87);
-	phy_write(phydev, 0x16, 0x80a2);
-	phy_write(phydev, 0x17, 0x0153);
-	phy_write(phydev, 0x16, 0x809c);
-	phy_write(phydev, 0x17, 0x0153);
-	phy_write(phydev, 0x1f, 0x0000);
-
-	phy_write(phydev, 0x1f, 0x0a43);
-	phy_write(phydev, 0x13, 0x81B3);
-	phy_write(phydev, 0x14, 0x0043);
-	phy_write(phydev, 0x14, 0x00A7);
-	phy_write(phydev, 0x14, 0x00D6);
-	phy_write(phydev, 0x14, 0x00EC);
-	phy_write(phydev, 0x14, 0x00F6);
-	phy_write(phydev, 0x14, 0x00FB);
-	phy_write(phydev, 0x14, 0x00FD);
-	phy_write(phydev, 0x14, 0x00FF);
-	phy_write(phydev, 0x14, 0x00BB);
-	phy_write(phydev, 0x14, 0x0058);
-	phy_write(phydev, 0x14, 0x0029);
-	phy_write(phydev, 0x14, 0x0013);
-	phy_write(phydev, 0x14, 0x0009);
-	phy_write(phydev, 0x14, 0x0004);
-	phy_write(phydev, 0x14, 0x0002);
-	for (i = 0; i < 25; i++)
-		phy_write(phydev, 0x14, 0x0000);
-	phy_write(phydev, 0x1f, 0x0000);
-
-	r8168g_phy_param(phydev, 0x8257, 0xffff, 0x020F);
-	r8168g_phy_param(phydev, 0x80ea, 0xffff, 0x7843);
-
-	rtl_apply_firmware(tp);
-
-	phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000);
-
-	r8168g_phy_param(phydev, 0x81a2, 0x0000, 0x0100);
-
-	phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00);
-	phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000);
-	phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020);
-	phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000);
-	phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
-	phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
-
-	rtl8125_config_eee_phy(tp);
-	rtl_enable_eee(tp);
-}
-
-static void rtl_hw_phy_config(struct net_device *dev)
-{
-	static const rtl_generic_fct phy_configs[] = {
-		/* PCI devices. */
-		[RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
-		[RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
-		[RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
-		[RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config,
-		[RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config,
-		/* PCI-E devices. */
-		[RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config,
-		[RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
-		[RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
-		[RTL_GIGA_MAC_VER_10] = NULL,
-		[RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
-		[RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config,
-		[RTL_GIGA_MAC_VER_13] = NULL,
-		[RTL_GIGA_MAC_VER_14] = NULL,
-		[RTL_GIGA_MAC_VER_15] = NULL,
-		[RTL_GIGA_MAC_VER_16] = NULL,
-		[RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
-		[RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config,
-		[RTL_GIGA_MAC_VER_22] = rtl8168c_3_hw_phy_config,
-		[RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config,
-		[RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config,
-		[RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config,
-		[RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config,
-		[RTL_GIGA_MAC_VER_31] = NULL,
-		[RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config,
-		[RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
-		[RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
-		[RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_41] = NULL,
-		[RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
-		[RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config,
-		[RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config,
-		[RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config,
-	};
-	struct rtl8169_private *tp = netdev_priv(dev);
+	ioffset = (data2 >> 1) & 0x7ff8;
+	ioffset |= data2 & 0x0007;
+	if (data1 & BIT(7))
+		ioffset |= BIT(15);
 
-	if (phy_configs[tp->mac_version])
-		phy_configs[tp->mac_version](tp);
+	return ioffset;
 }
 
 static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
@@ -3617,21 +2232,28 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
 		schedule_work(&tp->wk.work);
 }
 
-static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
+static void rtl8169_init_phy(struct rtl8169_private *tp)
 {
-	rtl_hw_phy_config(dev);
+	r8169_hw_phy_config(tp, tp->phydev, tp->mac_version);
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
 		pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
 		pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
-		netif_dbg(tp, drv, dev,
-			  "Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
+		/* set undocumented MAC Reg C+CR Offset 0x82h */
 		RTL_W8(tp, 0x82, 0x01);
 	}
 
+	if (tp->mac_version == RTL_GIGA_MAC_VER_05 &&
+	    tp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_GIGABYTE &&
+	    tp->pci_dev->subsystem_device == 0xe000)
+		phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf01b);
+
 	/* We may have called phy_speed_down before */
 	phy_speed_up(tp->phydev);
 
+	if (rtl_supports_eee(tp))
+		rtl_enable_eee(tp);
+
 	genphy_soft_reset(tp->phydev);
 }
 
@@ -3675,16 +2297,6 @@ static int rtl_set_mac_address(struct net_device *dev, void *p)
 	return 0;
 }
 
-static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	struct rtl8169_private *tp = netdev_priv(dev);
-
-	if (!netif_running(dev))
-		return -ENODEV;
-
-	return phy_mii_ioctl(tp->phydev, ifr, cmd);
-}
-
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
@@ -4710,9 +3322,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_pcie_state_l2l3_disable(tp);
 
-	rtl_writephy(tp, 0x1f, 0x0c42);
-	rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff);
-	rtl_writephy(tp, 0x1f, 0x0000);
+	rg_saw_cnt = phy_read_paged(tp->phydev, 0x0c42, 0x13) & 0x3fff;
 	if (rg_saw_cnt > 0) {
 		u16 sw_cnt_1ms_ini;
 
@@ -4887,7 +3497,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp)
 	r8168_mac_ocp_write(tp, 0xc09e, 0x0000);
 
 	/* firmware is for MAC only */
-	rtl_apply_firmware(tp);
+	r8169_apply_firmware(tp);
 
 	rtl_hw_aspm_clkreq_enable(tp, true);
 }
@@ -4991,6 +3601,9 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00);
 
+	/* disable EEE */
+	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
+
 	rtl_pcie_state_l2l3_disable(tp);
 }
 
@@ -5005,6 +3618,11 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
+	rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
+
+	/* disable EEE */
+	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
+
 	rtl_pcie_state_l2l3_disable(tp);
 	rtl_hw_aspm_clkreq_enable(tp, true);
 }
@@ -5222,11 +3840,8 @@ static void rtl_hw_start_8169(struct rtl8169_private *tp)
 	tp->cp_cmd |= PCIMulRW;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
-	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
-		netif_dbg(tp, drv, tp->dev,
-			  "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n");
-		tp->cp_cmd |= (1 << 14);
-	}
+	    tp->mac_version == RTL_GIGA_MAC_VER_03)
+		tp->cp_cmd |= EnAnaPLL;
 
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
@@ -5435,7 +4050,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 	netif_wake_queue(dev);
 }
 
-static void rtl8169_tx_timeout(struct net_device *dev)
+static void rtl8169_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
@@ -6240,7 +4855,7 @@ static int rtl_open(struct net_device *dev)
 
 	napi_enable(&tp->napi);
 
-	rtl8169_init_phy(dev, tp);
+	rtl8169_init_phy(tp);
 
 	rtl_pll_power_up(tp);
 
@@ -6371,7 +4986,7 @@ static void __rtl8169_resume(struct net_device *dev)
 	netif_device_attach(dev);
 
 	rtl_pll_power_up(tp);
-	rtl8169_init_phy(dev, tp);
+	rtl8169_init_phy(tp);
 
 	phy_start(tp->phydev);
 
@@ -6542,7 +5157,7 @@ static const struct net_device_ops rtl_netdev_ops = {
 	.ndo_fix_features	= rtl8169_fix_features,
 	.ndo_set_features	= rtl8169_set_features,
 	.ndo_set_mac_address	= rtl_set_mac_address,
-	.ndo_do_ioctl		= rtl8169_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_set_rx_mode	= rtl_set_rx_mode,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= rtl8169_netpoll,
@@ -6744,7 +5359,7 @@ static int rtl_jumbo_max(struct rtl8169_private *tp)
 {
 	/* Non-GBit versions don't support jumbo frames */
 	if (!tp->supports_gmii)
-		return JUMBO_1K;
+		return 0;
 
 	switch (tp->mac_version) {
 	/* RTL8169 */
@@ -6825,6 +5440,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int chipset, region;
 	int jumbo_max, rc;
 
+	/* Some tools for creating an initramfs don't consider softdeps, then
+	 * r8169.ko may be in initramfs, but realtek.ko not. Then the generic
+	 * PHY driver is used that doesn't work with most chip versions.
+	 */
+	if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) {
+		dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
+		return -ENOENT;
+	}
+
 	dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
 	if (!dev)
 		return -ENOMEM;
@@ -6966,10 +5590,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->hw_features |= NETIF_F_RXALL;
 	dev->hw_features |= NETIF_F_RXFCS;
 
-	/* MTU range: 60 - hw-specific max */
-	dev->min_mtu = ETH_ZLEN;
 	jumbo_max = rtl_jumbo_max(tp);
-	dev->max_mtu = jumbo_max;
+	if (jumbo_max)
+		dev->max_mtu = jumbo_max;
 
 	rtl_set_irq_mask(tp);
 
@@ -6999,7 +5622,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		   (RTL_R32(tp, TxConfig) >> 20) & 0xfcf,
 		   pci_irq_vector(pdev, 0));
 
-	if (jumbo_max > JUMBO_1K)
+	if (jumbo_max)
 		netif_info(tp, probe, dev,
 			   "jumbo features [frames: %d bytes, tx checksumming: %s]\n",
 			   jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
new file mode 100644
index 000000000000..e367e77c773b
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -0,0 +1,1307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * r8169_phy_config.c: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw>
+ * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/delay.h>
+#include <linux/phy.h>
+
+#include "r8169.h"
+
+typedef void (*rtl_phy_cfg_fct)(struct rtl8169_private *tp,
+				struct phy_device *phydev);
+
+static void r8168d_modify_extpage(struct phy_device *phydev, int extpage,
+				  int reg, u16 mask, u16 val)
+{
+	int oldpage = phy_select_page(phydev, 0x0007);
+
+	__phy_write(phydev, 0x1e, extpage);
+	__phy_modify(phydev, reg, mask, val);
+
+	phy_restore_page(phydev, oldpage, 0);
+}
+
+static void r8168d_phy_param(struct phy_device *phydev, u16 parm,
+			     u16 mask, u16 val)
+{
+	int oldpage = phy_select_page(phydev, 0x0005);
+
+	__phy_write(phydev, 0x05, parm);
+	__phy_modify(phydev, 0x06, mask, val);
+
+	phy_restore_page(phydev, oldpage, 0);
+}
+
+static void r8168g_phy_param(struct phy_device *phydev, u16 parm,
+			     u16 mask, u16 val)
+{
+	int oldpage = phy_select_page(phydev, 0x0a43);
+
+	__phy_write(phydev, 0x13, parm);
+	__phy_modify(phydev, 0x14, mask, val);
+
+	phy_restore_page(phydev, oldpage, 0);
+}
+
+struct phy_reg {
+	u16 reg;
+	u16 val;
+};
+
+static void __rtl_writephy_batch(struct phy_device *phydev,
+				 const struct phy_reg *regs, int len)
+{
+	phy_lock_mdio_bus(phydev);
+
+	while (len-- > 0) {
+		__phy_write(phydev, regs->reg, regs->val);
+		regs++;
+	}
+
+	phy_unlock_mdio_bus(phydev);
+}
+
+#define rtl_writephy_batch(p, a) __rtl_writephy_batch(p, a, ARRAY_SIZE(a))
+
+static void rtl8168f_config_eee_phy(struct phy_device *phydev)
+{
+	r8168d_modify_extpage(phydev, 0x0020, 0x15, 0, BIT(8));
+	r8168d_phy_param(phydev, 0x8b85, 0, BIT(13));
+}
+
+static void rtl8168g_config_eee_phy(struct phy_device *phydev)
+{
+	phy_modify_paged(phydev, 0x0a43, 0x11, 0, BIT(4));
+}
+
+static void rtl8168h_config_eee_phy(struct phy_device *phydev)
+{
+	rtl8168g_config_eee_phy(phydev);
+
+	phy_modify_paged(phydev, 0xa4a, 0x11, 0x0000, 0x0200);
+	phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080);
+}
+
+static void rtl8125_config_eee_phy(struct phy_device *phydev)
+{
+	rtl8168h_config_eee_phy(phydev);
+
+	phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000);
+	phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000);
+}
+
+static void rtl8169s_hw_phy_config(struct rtl8169_private *tp,
+				   struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001 },
+		{ 0x06, 0x006e },
+		{ 0x08, 0x0708 },
+		{ 0x15, 0x4000 },
+		{ 0x18, 0x65c7 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x03, 0x00a1 },
+		{ 0x02, 0x0008 },
+		{ 0x01, 0x0120 },
+		{ 0x00, 0x1000 },
+		{ 0x04, 0x0800 },
+		{ 0x04, 0x0000 },
+
+		{ 0x03, 0xff41 },
+		{ 0x02, 0xdf60 },
+		{ 0x01, 0x0140 },
+		{ 0x00, 0x0077 },
+		{ 0x04, 0x7800 },
+		{ 0x04, 0x7000 },
+
+		{ 0x03, 0x802f },
+		{ 0x02, 0x4f02 },
+		{ 0x01, 0x0409 },
+		{ 0x00, 0xf0f9 },
+		{ 0x04, 0x9800 },
+		{ 0x04, 0x9000 },
+
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0xff95 },
+		{ 0x00, 0xba00 },
+		{ 0x04, 0xa800 },
+		{ 0x04, 0xa000 },
+
+		{ 0x03, 0xff41 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0x0140 },
+		{ 0x00, 0x00bb },
+		{ 0x04, 0xb800 },
+		{ 0x04, 0xb000 },
+
+		{ 0x03, 0xdf41 },
+		{ 0x02, 0xdc60 },
+		{ 0x01, 0x6340 },
+		{ 0x00, 0x007d },
+		{ 0x04, 0xd800 },
+		{ 0x04, 0xd000 },
+
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0x100a },
+		{ 0x00, 0xa0ff },
+		{ 0x04, 0xf800 },
+		{ 0x04, 0xf000 },
+
+		{ 0x1f, 0x0000 },
+		{ 0x0b, 0x0000 },
+		{ 0x00, 0x9200 }
+	};
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8169sb_hw_phy_config(struct rtl8169_private *tp,
+				    struct phy_device *phydev)
+{
+	phy_write_paged(phydev, 0x0002, 0x01, 0x90d0);
+}
+
+static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001 },
+		{ 0x04, 0x0000 },
+		{ 0x03, 0x00a1 },
+		{ 0x02, 0x0008 },
+		{ 0x01, 0x0120 },
+		{ 0x00, 0x1000 },
+		{ 0x04, 0x0800 },
+		{ 0x04, 0x9000 },
+		{ 0x03, 0x802f },
+		{ 0x02, 0x4f02 },
+		{ 0x01, 0x0409 },
+		{ 0x00, 0xf099 },
+		{ 0x04, 0x9800 },
+		{ 0x04, 0xa000 },
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0xff95 },
+		{ 0x00, 0xba00 },
+		{ 0x04, 0xa800 },
+		{ 0x04, 0xf000 },
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0x101a },
+		{ 0x00, 0xa0ff },
+		{ 0x04, 0xf800 },
+		{ 0x04, 0x0000 },
+		{ 0x1f, 0x0000 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x10, 0xf41b },
+		{ 0x14, 0xfb54 },
+		{ 0x18, 0xf5c7 },
+		{ 0x1f, 0x0000 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x17, 0x0cc0 },
+		{ 0x1f, 0x0000 }
+	};
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001 },
+		{ 0x04, 0x0000 },
+		{ 0x03, 0x00a1 },
+		{ 0x02, 0x0008 },
+		{ 0x01, 0x0120 },
+		{ 0x00, 0x1000 },
+		{ 0x04, 0x0800 },
+		{ 0x04, 0x9000 },
+		{ 0x03, 0x802f },
+		{ 0x02, 0x4f02 },
+		{ 0x01, 0x0409 },
+		{ 0x00, 0xf099 },
+		{ 0x04, 0x9800 },
+		{ 0x04, 0xa000 },
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0xff95 },
+		{ 0x00, 0xba00 },
+		{ 0x04, 0xa800 },
+		{ 0x04, 0xf000 },
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0x101a },
+		{ 0x00, 0xa0ff },
+		{ 0x04, 0xf800 },
+		{ 0x04, 0x0000 },
+		{ 0x1f, 0x0000 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x0b, 0x8480 },
+		{ 0x1f, 0x0000 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x18, 0x67c7 },
+		{ 0x04, 0x2000 },
+		{ 0x03, 0x002f },
+		{ 0x02, 0x4360 },
+		{ 0x01, 0x0109 },
+		{ 0x00, 0x3022 },
+		{ 0x04, 0x2800 },
+		{ 0x1f, 0x0000 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x17, 0x0cc0 },
+		{ 0x1f, 0x0000 }
+	};
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp,
+				    struct phy_device *phydev)
+{
+	phy_write(phydev, 0x1f, 0x0001);
+	phy_set_bits(phydev, 0x16, BIT(0));
+	phy_write(phydev, 0x10, 0xf41b);
+	phy_write(phydev, 0x1f, 0x0000);
+}
+
+static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	phy_write_paged(phydev, 0x0001, 0x10, 0xf41b);
+}
+
+static void rtl8168cp_1_hw_phy_config(struct rtl8169_private *tp,
+				      struct phy_device *phydev)
+{
+	phy_write(phydev, 0x1d, 0x0f00);
+	phy_write_paged(phydev, 0x0002, 0x0c, 0x1ec8);
+}
+
+static void rtl8168cp_2_hw_phy_config(struct rtl8169_private *tp,
+				      struct phy_device *phydev)
+{
+	phy_set_bits(phydev, 0x14, BIT(5));
+	phy_set_bits(phydev, 0x0d, BIT(5));
+	phy_write_paged(phydev, 0x0001, 0x1d, 0x3d98);
+}
+
+static void rtl8168c_1_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001 },
+		{ 0x12, 0x2300 },
+		{ 0x1f, 0x0002 },
+		{ 0x00, 0x88d4 },
+		{ 0x01, 0x82b1 },
+		{ 0x03, 0x7002 },
+		{ 0x08, 0x9e30 },
+		{ 0x09, 0x01f0 },
+		{ 0x0a, 0x5500 },
+		{ 0x0c, 0x00c8 },
+		{ 0x1f, 0x0003 },
+		{ 0x12, 0xc096 },
+		{ 0x16, 0x000a },
+		{ 0x1f, 0x0000 },
+		{ 0x1f, 0x0000 },
+		{ 0x09, 0x2000 },
+		{ 0x09, 0x0000 }
+	};
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+
+	phy_set_bits(phydev, 0x14, BIT(5));
+	phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static void rtl8168c_2_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001 },
+		{ 0x12, 0x2300 },
+		{ 0x03, 0x802f },
+		{ 0x02, 0x4f02 },
+		{ 0x01, 0x0409 },
+		{ 0x00, 0xf099 },
+		{ 0x04, 0x9800 },
+		{ 0x04, 0x9000 },
+		{ 0x1d, 0x3d98 },
+		{ 0x1f, 0x0002 },
+		{ 0x0c, 0x7eb8 },
+		{ 0x06, 0x0761 },
+		{ 0x1f, 0x0003 },
+		{ 0x16, 0x0f0a },
+		{ 0x1f, 0x0000 }
+	};
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+
+	phy_set_bits(phydev, 0x16, BIT(0));
+	phy_set_bits(phydev, 0x14, BIT(5));
+	phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static void rtl8168c_3_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001 },
+		{ 0x12, 0x2300 },
+		{ 0x1d, 0x3d98 },
+		{ 0x1f, 0x0002 },
+		{ 0x0c, 0x7eb8 },
+		{ 0x06, 0x5461 },
+		{ 0x1f, 0x0003 },
+		{ 0x16, 0x0f0a },
+		{ 0x1f, 0x0000 }
+	};
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+
+	phy_set_bits(phydev, 0x16, BIT(0));
+	phy_set_bits(phydev, 0x14, BIT(5));
+	phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
+	/* Channel Estimation */
+	{ 0x1f, 0x0001 },
+	{ 0x06, 0x4064 },
+	{ 0x07, 0x2863 },
+	{ 0x08, 0x059c },
+	{ 0x09, 0x26b4 },
+	{ 0x0a, 0x6a19 },
+	{ 0x0b, 0xdcc8 },
+	{ 0x10, 0xf06d },
+	{ 0x14, 0x7f68 },
+	{ 0x18, 0x7fd9 },
+	{ 0x1c, 0xf0ff },
+	{ 0x1d, 0x3d9c },
+	{ 0x1f, 0x0003 },
+	{ 0x12, 0xf49f },
+	{ 0x13, 0x070b },
+	{ 0x1a, 0x05ad },
+	{ 0x14, 0x94c0 },
+
+	/*
+	 * Tx Error Issue
+	 * Enhance line driver power
+	 */
+	{ 0x1f, 0x0002 },
+	{ 0x06, 0x5561 },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8332 },
+	{ 0x06, 0x5561 },
+
+	/*
+	 * Can not link to 1Gbps with bad cable
+	 * Decrease SNR threshold form 21.07dB to 19.04dB
+	 */
+	{ 0x1f, 0x0001 },
+	{ 0x17, 0x0cc0 },
+
+	{ 0x1f, 0x0000 },
+	{ 0x0d, 0xf880 }
+};
+
+static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
+	{ 0x1f, 0x0002 },
+	{ 0x05, 0x669a },
+	{ 0x1f, 0x0005 },
+	{ 0x05, 0x8330 },
+	{ 0x06, 0x669a },
+	{ 0x1f, 0x0002 }
+};
+
+static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp,
+					 struct phy_device *phydev,
+					 u16 val)
+{
+	u16 reg_val;
+
+	phy_write(phydev, 0x1f, 0x0005);
+	phy_write(phydev, 0x05, 0x001b);
+	reg_val = phy_read(phydev, 0x06);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	if (reg_val != val)
+		phydev_warn(phydev, "chipset not ready for firmware\n");
+	else
+		r8169_apply_firmware(tp);
+}
+
+static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0);
+
+	/*
+	 * Rx Error Issue
+	 * Fine Tune Switching regulator parameter
+	 */
+	phy_write(phydev, 0x1f, 0x0002);
+	phy_modify(phydev, 0x0b, 0x00ef, 0x0010);
+	phy_modify(phydev, 0x0c, 0x5d00, 0xa200);
+
+	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
+		int val;
+
+		rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
+
+		val = phy_read(phydev, 0x0d);
+
+		if ((val & 0x00ff) != 0x006c) {
+			static const u32 set[] = {
+				0x0065, 0x0066, 0x0067, 0x0068,
+				0x0069, 0x006a, 0x006b, 0x006c
+			};
+			int i;
+
+			phy_write(phydev, 0x1f, 0x0002);
+
+			val &= 0xff00;
+			for (i = 0; i < ARRAY_SIZE(set); i++)
+				phy_write(phydev, 0x0d, val | set[i]);
+		}
+	} else {
+		phy_write_paged(phydev, 0x0002, 0x05, 0x6662);
+		r8168d_phy_param(phydev, 0x8330, 0xffff, 0x6662);
+	}
+
+	/* RSET couple improve */
+	phy_write(phydev, 0x1f, 0x0002);
+	phy_set_bits(phydev, 0x0d, 0x0300);
+	phy_set_bits(phydev, 0x0f, 0x0010);
+
+	/* Fine tune PLL performance */
+	phy_write(phydev, 0x1f, 0x0002);
+	phy_modify(phydev, 0x02, 0x0600, 0x0100);
+	phy_clear_bits(phydev, 0x03, 0xe000);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	rtl8168d_apply_firmware_cond(tp, phydev, 0xbf00);
+}
+
+static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0);
+
+	if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
+		int val;
+
+		rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
+
+		val = phy_read(phydev, 0x0d);
+		if ((val & 0x00ff) != 0x006c) {
+			static const u32 set[] = {
+				0x0065, 0x0066, 0x0067, 0x0068,
+				0x0069, 0x006a, 0x006b, 0x006c
+			};
+			int i;
+
+			phy_write(phydev, 0x1f, 0x0002);
+
+			val &= 0xff00;
+			for (i = 0; i < ARRAY_SIZE(set); i++)
+				phy_write(phydev, 0x0d, val | set[i]);
+		}
+	} else {
+		phy_write_paged(phydev, 0x0002, 0x05, 0x2642);
+		r8168d_phy_param(phydev, 0x8330, 0xffff, 0x2642);
+	}
+
+	/* Fine tune PLL performance */
+	phy_write(phydev, 0x1f, 0x0002);
+	phy_modify(phydev, 0x02, 0x0600, 0x0100);
+	phy_clear_bits(phydev, 0x03, 0xe000);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	/* Switching regulator Slew rate */
+	phy_modify_paged(phydev, 0x0002, 0x0f, 0x0000, 0x0017);
+
+	rtl8168d_apply_firmware_cond(tp, phydev, 0xb300);
+}
+
+static void rtl8168d_3_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0002 },
+		{ 0x10, 0x0008 },
+		{ 0x0d, 0x006c },
+
+		{ 0x1f, 0x0000 },
+		{ 0x0d, 0xf880 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x17, 0x0cc0 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x0b, 0xa4d8 },
+		{ 0x09, 0x281c },
+		{ 0x07, 0x2883 },
+		{ 0x0a, 0x6b35 },
+		{ 0x1d, 0x3da4 },
+		{ 0x1c, 0xeffd },
+		{ 0x14, 0x7f52 },
+		{ 0x18, 0x7fc6 },
+		{ 0x08, 0x0601 },
+		{ 0x06, 0x4063 },
+		{ 0x10, 0xf074 },
+		{ 0x1f, 0x0003 },
+		{ 0x13, 0x0789 },
+		{ 0x12, 0xf4bd },
+		{ 0x1a, 0x04fd },
+		{ 0x14, 0x84b0 },
+		{ 0x1f, 0x0000 },
+		{ 0x00, 0x9200 },
+
+		{ 0x1f, 0x0005 },
+		{ 0x01, 0x0340 },
+		{ 0x1f, 0x0001 },
+		{ 0x04, 0x4000 },
+		{ 0x03, 0x1d21 },
+		{ 0x02, 0x0c32 },
+		{ 0x01, 0x0200 },
+		{ 0x00, 0x5554 },
+		{ 0x04, 0x4800 },
+		{ 0x04, 0x4000 },
+		{ 0x04, 0xf000 },
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0x101a },
+		{ 0x00, 0xa0ff },
+		{ 0x04, 0xf800 },
+		{ 0x04, 0xf000 },
+		{ 0x1f, 0x0000 },
+	};
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+	r8168d_modify_extpage(phydev, 0x0023, 0x16, 0xffff, 0x0000);
+}
+
+static void rtl8168d_4_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	phy_write_paged(phydev, 0x0001, 0x17, 0x0cc0);
+	r8168d_modify_extpage(phydev, 0x002d, 0x18, 0xffff, 0x0040);
+	phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static void rtl8168e_1_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		/* Channel estimation fine tune */
+		{ 0x1f, 0x0001 },
+		{ 0x0b, 0x6c20 },
+		{ 0x07, 0x2872 },
+		{ 0x1c, 0xefff },
+		{ 0x1f, 0x0003 },
+		{ 0x14, 0x6420 },
+		{ 0x1f, 0x0000 },
+	};
+
+	r8169_apply_firmware(tp);
+
+	/* Enable Delay cap */
+	r8168d_phy_param(phydev, 0x8b80, 0xffff, 0xc896);
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+
+	/* Update PFM & 10M TX idle timer */
+	r8168d_modify_extpage(phydev, 0x002f, 0x15, 0xffff, 0x1919);
+
+	r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
+
+	/* DCO enable for 10M IDLE Power */
+	r8168d_modify_extpage(phydev, 0x0023, 0x17, 0x0000, 0x0006);
+
+	/* For impedance matching */
+	phy_modify_paged(phydev, 0x0002, 0x08, 0x7f00, 0x8000);
+
+	/* PHY auto speed down */
+	r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0050);
+	phy_set_bits(phydev, 0x14, BIT(15));
+
+	r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
+	r8168d_phy_param(phydev, 0x8b85, 0x2000, 0x0000);
+
+	r8168d_modify_extpage(phydev, 0x0020, 0x15, 0x1100, 0x0000);
+	phy_write_paged(phydev, 0x0006, 0x00, 0x5a00);
+
+	phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0000);
+}
+
+static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	r8169_apply_firmware(tp);
+
+	/* Enable Delay cap */
+	r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
+
+	/* Channel estimation fine tune */
+	phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
+
+	/* Green Setting */
+	r8168d_phy_param(phydev, 0x8b5b, 0xffff, 0x9222);
+	r8168d_phy_param(phydev, 0x8b6d, 0xffff, 0x8000);
+	r8168d_phy_param(phydev, 0x8b76, 0xffff, 0x8000);
+
+	/* For 4-corner performance improve */
+	phy_write(phydev, 0x1f, 0x0005);
+	phy_write(phydev, 0x05, 0x8b80);
+	phy_set_bits(phydev, 0x17, 0x0006);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	/* PHY auto speed down */
+	r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
+	phy_set_bits(phydev, 0x14, BIT(15));
+
+	/* improve 10M EEE waveform */
+	r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
+
+	/* Improve 2-pair detection performance */
+	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
+
+	rtl8168f_config_eee_phy(phydev);
+
+	/* Green feature */
+	phy_write(phydev, 0x1f, 0x0003);
+	phy_set_bits(phydev, 0x19, BIT(0));
+	phy_set_bits(phydev, 0x10, BIT(10));
+	phy_write(phydev, 0x1f, 0x0000);
+	phy_modify_paged(phydev, 0x0005, 0x01, 0, BIT(8));
+}
+
+static void rtl8168f_hw_phy_config(struct rtl8169_private *tp,
+				   struct phy_device *phydev)
+{
+	/* For 4-corner performance improve */
+	r8168d_phy_param(phydev, 0x8b80, 0x0000, 0x0006);
+
+	/* PHY auto speed down */
+	r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
+	phy_set_bits(phydev, 0x14, BIT(15));
+
+	/* Improve 10M EEE waveform */
+	r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
+
+	rtl8168f_config_eee_phy(phydev);
+}
+
+static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	r8169_apply_firmware(tp);
+
+	/* Channel estimation fine tune */
+	phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
+
+	/* Modify green table for giga & fnet */
+	r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
+	r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
+	r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
+	r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
+	r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
+	r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00fb);
+
+	/* Modify green table for 10M */
+	r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
+
+	/* Disable hiimpedance detection (RTCT) */
+	phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
+
+	rtl8168f_hw_phy_config(tp, phydev);
+
+	/* Improve 2-pair detection performance */
+	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
+}
+
+static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	r8169_apply_firmware(tp);
+
+	rtl8168f_hw_phy_config(tp, phydev);
+}
+
+static void rtl8411_hw_phy_config(struct rtl8169_private *tp,
+				  struct phy_device *phydev)
+{
+	r8169_apply_firmware(tp);
+
+	rtl8168f_hw_phy_config(tp, phydev);
+
+	/* Improve 2-pair detection performance */
+	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
+
+	/* Channel estimation fine tune */
+	phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
+
+	/* Modify green table for giga & fnet */
+	r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
+	r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
+	r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
+	r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
+	r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
+	r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00aa);
+
+	/* Modify green table for 10M */
+	r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
+
+	/* Disable hiimpedance detection (RTCT) */
+	phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
+
+	/* Modify green table for giga */
+	r8168d_phy_param(phydev, 0x8b54, 0x0800, 0x0000);
+	r8168d_phy_param(phydev, 0x8b5d, 0x0800, 0x0000);
+	r8168d_phy_param(phydev, 0x8a7c, 0x0100, 0x0000);
+	r8168d_phy_param(phydev, 0x8a7f, 0x0000, 0x0100);
+	r8168d_phy_param(phydev, 0x8a82, 0x0100, 0x0000);
+	r8168d_phy_param(phydev, 0x8a85, 0x0100, 0x0000);
+	r8168d_phy_param(phydev, 0x8a88, 0x0100, 0x0000);
+
+	/* uc same-seed solution */
+	r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x8000);
+
+	/* Green feature */
+	phy_write(phydev, 0x1f, 0x0003);
+	phy_clear_bits(phydev, 0x19, BIT(0));
+	phy_clear_bits(phydev, 0x10, BIT(10));
+	phy_write(phydev, 0x1f, 0x0000);
+}
+
+static void rtl8168g_disable_aldps(struct phy_device *phydev)
+{
+	phy_modify_paged(phydev, 0x0a43, 0x10, BIT(2), 0);
+}
+
+static void rtl8168g_phy_adjust_10m_aldps(struct phy_device *phydev)
+{
+	phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0);
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6));
+	r8168g_phy_param(phydev, 0x8084, 0x6000, 0x0000);
+	phy_modify_paged(phydev, 0x0a43, 0x10, 0x0000, 0x1003);
+}
+
+static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	int ret;
+
+	r8169_apply_firmware(tp);
+
+	ret = phy_read_paged(phydev, 0x0a46, 0x10);
+	if (ret & BIT(8))
+		phy_modify_paged(phydev, 0x0bcc, 0x12, BIT(15), 0);
+	else
+		phy_modify_paged(phydev, 0x0bcc, 0x12, 0, BIT(15));
+
+	ret = phy_read_paged(phydev, 0x0a46, 0x13);
+	if (ret & BIT(8))
+		phy_modify_paged(phydev, 0x0c41, 0x15, 0, BIT(1));
+	else
+		phy_modify_paged(phydev, 0x0c41, 0x15, BIT(1), 0);
+
+	/* Enable PHY auto speed down */
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
+
+	rtl8168g_phy_adjust_10m_aldps(phydev);
+
+	/* EEE auto-fallback function */
+	phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2));
+
+	/* Enable UC LPF tune function */
+	r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
+
+	phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
+
+	/* Improve SWR Efficiency */
+	phy_write(phydev, 0x1f, 0x0bcd);
+	phy_write(phydev, 0x14, 0x5065);
+	phy_write(phydev, 0x14, 0xd065);
+	phy_write(phydev, 0x1f, 0x0bc8);
+	phy_write(phydev, 0x11, 0x5655);
+	phy_write(phydev, 0x1f, 0x0bcd);
+	phy_write(phydev, 0x14, 0x1065);
+	phy_write(phydev, 0x14, 0x9065);
+	phy_write(phydev, 0x14, 0x1065);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	rtl8168g_disable_aldps(phydev);
+	rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	r8169_apply_firmware(tp);
+	rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	u16 dout_tapbin;
+	u32 data;
+
+	r8169_apply_firmware(tp);
+
+	/* CHN EST parameters adjust - giga master */
+	r8168g_phy_param(phydev, 0x809b, 0xf800, 0x8000);
+	r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x8000);
+	r8168g_phy_param(phydev, 0x80a4, 0xff00, 0x8500);
+	r8168g_phy_param(phydev, 0x809c, 0xff00, 0xbd00);
+
+	/* CHN EST parameters adjust - giga slave */
+	r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x7000);
+	r8168g_phy_param(phydev, 0x80b4, 0xff00, 0x5000);
+	r8168g_phy_param(phydev, 0x80ac, 0xff00, 0x4000);
+
+	/* CHN EST parameters adjust - fnet */
+	r8168g_phy_param(phydev, 0x808e, 0xff00, 0x1200);
+	r8168g_phy_param(phydev, 0x8090, 0xff00, 0xe500);
+	r8168g_phy_param(phydev, 0x8092, 0xff00, 0x9f00);
+
+	/* enable R-tune & PGA-retune function */
+	dout_tapbin = 0;
+	data = phy_read_paged(phydev, 0x0a46, 0x13);
+	data &= 3;
+	data <<= 2;
+	dout_tapbin |= data;
+	data = phy_read_paged(phydev, 0x0a46, 0x12);
+	data &= 0xc000;
+	data >>= 14;
+	dout_tapbin |= data;
+	dout_tapbin = ~(dout_tapbin ^ 0x08);
+	dout_tapbin <<= 12;
+	dout_tapbin &= 0xf000;
+
+	r8168g_phy_param(phydev, 0x827a, 0xf000, dout_tapbin);
+	r8168g_phy_param(phydev, 0x827b, 0xf000, dout_tapbin);
+	r8168g_phy_param(phydev, 0x827c, 0xf000, dout_tapbin);
+	r8168g_phy_param(phydev, 0x827d, 0xf000, dout_tapbin);
+	r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
+	phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
+
+	/* enable GPHY 10M */
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11));
+
+	/* SAR ADC performance */
+	phy_modify_paged(phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14));
+
+	r8168g_phy_param(phydev, 0x803f, 0x3000, 0x0000);
+	r8168g_phy_param(phydev, 0x8047, 0x3000, 0x0000);
+	r8168g_phy_param(phydev, 0x804f, 0x3000, 0x0000);
+	r8168g_phy_param(phydev, 0x8057, 0x3000, 0x0000);
+	r8168g_phy_param(phydev, 0x805f, 0x3000, 0x0000);
+	r8168g_phy_param(phydev, 0x8067, 0x3000, 0x0000);
+	r8168g_phy_param(phydev, 0x806f, 0x3000, 0x0000);
+
+	/* disable phy pfm mode */
+	phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
+
+	rtl8168g_disable_aldps(phydev);
+	rtl8168h_config_eee_phy(phydev);
+}
+
+static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp,
+				     struct phy_device *phydev)
+{
+	u16 ioffset, rlen;
+	u32 data;
+
+	r8169_apply_firmware(tp);
+
+	/* CHIN EST parameter update */
+	r8168g_phy_param(phydev, 0x808a, 0x003f, 0x000a);
+
+	/* enable R-tune & PGA-retune function */
+	r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
+	phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
+
+	/* enable GPHY 10M */
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11));
+
+	ioffset = rtl8168h_2_get_adc_bias_ioffset(tp);
+	if (ioffset != 0xffff)
+		phy_write_paged(phydev, 0x0bcf, 0x16, ioffset);
+
+	/* Modify rlen (TX LPF corner frequency) level */
+	data = phy_read_paged(phydev, 0x0bcd, 0x16);
+	data &= 0x000f;
+	rlen = 0;
+	if (data > 3)
+		rlen = data - 3;
+	data = rlen | (rlen << 4) | (rlen << 8) | (rlen << 12);
+	phy_write_paged(phydev, 0x0bcd, 0x17, data);
+
+	/* disable phy pfm mode */
+	phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
+
+	rtl8168g_disable_aldps(phydev);
+	rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp,
+				      struct phy_device *phydev)
+{
+	/* Enable PHY auto speed down */
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
+
+	rtl8168g_phy_adjust_10m_aldps(phydev);
+
+	/* Enable EEE auto-fallback function */
+	phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2));
+
+	/* Enable UC LPF tune function */
+	r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
+
+	/* set rg_sel_sdm_rate */
+	phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
+
+	rtl8168g_disable_aldps(phydev);
+	rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp,
+				      struct phy_device *phydev)
+{
+	rtl8168g_phy_adjust_10m_aldps(phydev);
+
+	/* Enable UC LPF tune function */
+	r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
+
+	/* Set rg_sel_sdm_rate */
+	phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
+
+	/* Channel estimation parameters */
+	r8168g_phy_param(phydev, 0x80f3, 0xff00, 0x8b00);
+	r8168g_phy_param(phydev, 0x80f0, 0xff00, 0x3a00);
+	r8168g_phy_param(phydev, 0x80ef, 0xff00, 0x0500);
+	r8168g_phy_param(phydev, 0x80f6, 0xff00, 0x6e00);
+	r8168g_phy_param(phydev, 0x80ec, 0xff00, 0x6800);
+	r8168g_phy_param(phydev, 0x80ed, 0xff00, 0x7c00);
+	r8168g_phy_param(phydev, 0x80f2, 0xff00, 0xf400);
+	r8168g_phy_param(phydev, 0x80f4, 0xff00, 0x8500);
+	r8168g_phy_param(phydev, 0x8110, 0xff00, 0xa800);
+	r8168g_phy_param(phydev, 0x810f, 0xff00, 0x1d00);
+	r8168g_phy_param(phydev, 0x8111, 0xff00, 0xf500);
+	r8168g_phy_param(phydev, 0x8113, 0xff00, 0x6100);
+	r8168g_phy_param(phydev, 0x8115, 0xff00, 0x9200);
+	r8168g_phy_param(phydev, 0x810e, 0xff00, 0x0400);
+	r8168g_phy_param(phydev, 0x810c, 0xff00, 0x7c00);
+	r8168g_phy_param(phydev, 0x810b, 0xff00, 0x5a00);
+	r8168g_phy_param(phydev, 0x80d1, 0xff00, 0xff00);
+	r8168g_phy_param(phydev, 0x80cd, 0xff00, 0x9e00);
+	r8168g_phy_param(phydev, 0x80d3, 0xff00, 0x0e00);
+	r8168g_phy_param(phydev, 0x80d5, 0xff00, 0xca00);
+	r8168g_phy_param(phydev, 0x80d7, 0xff00, 0x8400);
+
+	/* Force PWM-mode */
+	phy_write(phydev, 0x1f, 0x0bcd);
+	phy_write(phydev, 0x14, 0x5065);
+	phy_write(phydev, 0x14, 0xd065);
+	phy_write(phydev, 0x1f, 0x0bc8);
+	phy_write(phydev, 0x12, 0x00ed);
+	phy_write(phydev, 0x1f, 0x0bcd);
+	phy_write(phydev, 0x14, 0x1065);
+	phy_write(phydev, 0x14, 0x9065);
+	phy_write(phydev, 0x14, 0x1065);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	rtl8168g_disable_aldps(phydev);
+	rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8117_hw_phy_config(struct rtl8169_private *tp,
+				  struct phy_device *phydev)
+{
+	/* CHN EST parameters adjust - fnet */
+	r8168g_phy_param(phydev, 0x808e, 0xff00, 0x4800);
+	r8168g_phy_param(phydev, 0x8090, 0xff00, 0xcc00);
+	r8168g_phy_param(phydev, 0x8092, 0xff00, 0xb000);
+
+	r8168g_phy_param(phydev, 0x8088, 0xff00, 0x6000);
+	r8168g_phy_param(phydev, 0x808b, 0x3f00, 0x0b00);
+	r8168g_phy_param(phydev, 0x808d, 0x1f00, 0x0600);
+	r8168g_phy_param(phydev, 0x808c, 0xff00, 0xb000);
+	r8168g_phy_param(phydev, 0x80a0, 0xff00, 0x2800);
+	r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x5000);
+	r8168g_phy_param(phydev, 0x809b, 0xf800, 0xb000);
+	r8168g_phy_param(phydev, 0x809a, 0xff00, 0x4b00);
+	r8168g_phy_param(phydev, 0x809d, 0x3f00, 0x0800);
+	r8168g_phy_param(phydev, 0x80a1, 0xff00, 0x7000);
+	r8168g_phy_param(phydev, 0x809f, 0x1f00, 0x0300);
+	r8168g_phy_param(phydev, 0x809e, 0xff00, 0x8800);
+	r8168g_phy_param(phydev, 0x80b2, 0xff00, 0x2200);
+	r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x9800);
+	r8168g_phy_param(phydev, 0x80af, 0x3f00, 0x0800);
+	r8168g_phy_param(phydev, 0x80b3, 0xff00, 0x6f00);
+	r8168g_phy_param(phydev, 0x80b1, 0x1f00, 0x0300);
+	r8168g_phy_param(phydev, 0x80b0, 0xff00, 0x9300);
+
+	r8168g_phy_param(phydev, 0x8011, 0x0000, 0x0800);
+
+	/* enable GPHY 10M */
+	phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11));
+
+	r8168g_phy_param(phydev, 0x8016, 0x0000, 0x0400);
+
+	rtl8168g_disable_aldps(phydev);
+	rtl8168h_config_eee_phy(phydev);
+}
+
+static void rtl8102e_hw_phy_config(struct rtl8169_private *tp,
+				   struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0003 },
+		{ 0x08, 0x441d },
+		{ 0x01, 0x9100 },
+		{ 0x1f, 0x0000 }
+	};
+
+	phy_set_bits(phydev, 0x11, BIT(12));
+	phy_set_bits(phydev, 0x19, BIT(13));
+	phy_set_bits(phydev, 0x10, BIT(15));
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8105e_hw_phy_config(struct rtl8169_private *tp,
+				   struct phy_device *phydev)
+{
+	/* Disable ALDPS before ram code */
+	phy_write(phydev, 0x18, 0x0310);
+	msleep(100);
+
+	r8169_apply_firmware(tp);
+
+	phy_write_paged(phydev, 0x0005, 0x1a, 0x0000);
+	phy_write_paged(phydev, 0x0004, 0x1c, 0x0000);
+	phy_write_paged(phydev, 0x0001, 0x15, 0x7701);
+}
+
+static void rtl8402_hw_phy_config(struct rtl8169_private *tp,
+				  struct phy_device *phydev)
+{
+	/* Disable ALDPS before setting firmware */
+	phy_write(phydev, 0x18, 0x0310);
+	msleep(20);
+
+	r8169_apply_firmware(tp);
+
+	/* EEE setting */
+	phy_write(phydev, 0x1f, 0x0004);
+	phy_write(phydev, 0x10, 0x401f);
+	phy_write(phydev, 0x19, 0x7030);
+	phy_write(phydev, 0x1f, 0x0000);
+}
+
+static void rtl8106e_hw_phy_config(struct rtl8169_private *tp,
+				   struct phy_device *phydev)
+{
+	static const struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0004 },
+		{ 0x10, 0xc07f },
+		{ 0x19, 0x7030 },
+		{ 0x1f, 0x0000 }
+	};
+
+	/* Disable ALDPS before ram code */
+	phy_write(phydev, 0x18, 0x0310);
+	msleep(100);
+
+	r8169_apply_firmware(tp);
+
+	rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp,
+				    struct phy_device *phydev)
+{
+	phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084);
+	phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+	phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006);
+	phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+	phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100);
+	phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000);
+	phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400);
+	phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff);
+	phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff);
+
+	r8168g_phy_param(phydev, 0x80ea, 0xff00, 0xc400);
+	r8168g_phy_param(phydev, 0x80eb, 0x0700, 0x0300);
+	r8168g_phy_param(phydev, 0x80f8, 0xff00, 0x1c00);
+	r8168g_phy_param(phydev, 0x80f1, 0xff00, 0x3000);
+	r8168g_phy_param(phydev, 0x80fe, 0xff00, 0xa500);
+	r8168g_phy_param(phydev, 0x8102, 0xff00, 0x5000);
+	r8168g_phy_param(phydev, 0x8105, 0xff00, 0x3300);
+	r8168g_phy_param(phydev, 0x8100, 0xff00, 0x7000);
+	r8168g_phy_param(phydev, 0x8104, 0xff00, 0xf000);
+	r8168g_phy_param(phydev, 0x8106, 0xff00, 0x6500);
+	r8168g_phy_param(phydev, 0x80dc, 0xff00, 0xed00);
+	r8168g_phy_param(phydev, 0x80df, 0x0000, 0x0100);
+	r8168g_phy_param(phydev, 0x80e1, 0x0100, 0x0000);
+
+	phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038);
+	r8168g_phy_param(phydev, 0x819f, 0xffff, 0xd0b6);
+
+	phy_write_paged(phydev, 0xbc3, 0x12, 0x5555);
+	phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
+	phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
+	phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+	rtl8125_config_eee_phy(phydev);
+}
+
+static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp,
+				    struct phy_device *phydev)
+{
+	int i;
+
+	phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+	phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff);
+	phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+	phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000);
+	phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002);
+	phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044);
+	phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000);
+	phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000);
+	phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002);
+	phy_write_paged(phydev, 0xad4, 0x16, 0x00a8);
+	phy_write_paged(phydev, 0xac5, 0x16, 0x01ff);
+	phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030);
+
+	phy_write(phydev, 0x1f, 0x0b87);
+	phy_write(phydev, 0x16, 0x80a2);
+	phy_write(phydev, 0x17, 0x0153);
+	phy_write(phydev, 0x16, 0x809c);
+	phy_write(phydev, 0x17, 0x0153);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	phy_write(phydev, 0x1f, 0x0a43);
+	phy_write(phydev, 0x13, 0x81B3);
+	phy_write(phydev, 0x14, 0x0043);
+	phy_write(phydev, 0x14, 0x00A7);
+	phy_write(phydev, 0x14, 0x00D6);
+	phy_write(phydev, 0x14, 0x00EC);
+	phy_write(phydev, 0x14, 0x00F6);
+	phy_write(phydev, 0x14, 0x00FB);
+	phy_write(phydev, 0x14, 0x00FD);
+	phy_write(phydev, 0x14, 0x00FF);
+	phy_write(phydev, 0x14, 0x00BB);
+	phy_write(phydev, 0x14, 0x0058);
+	phy_write(phydev, 0x14, 0x0029);
+	phy_write(phydev, 0x14, 0x0013);
+	phy_write(phydev, 0x14, 0x0009);
+	phy_write(phydev, 0x14, 0x0004);
+	phy_write(phydev, 0x14, 0x0002);
+	for (i = 0; i < 25; i++)
+		phy_write(phydev, 0x14, 0x0000);
+	phy_write(phydev, 0x1f, 0x0000);
+
+	r8168g_phy_param(phydev, 0x8257, 0xffff, 0x020F);
+	r8168g_phy_param(phydev, 0x80ea, 0xffff, 0x7843);
+
+	r8169_apply_firmware(tp);
+
+	phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000);
+
+	r8168g_phy_param(phydev, 0x81a2, 0x0000, 0x0100);
+
+	phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00);
+	phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000);
+	phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020);
+	phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000);
+	phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
+	phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+	rtl8125_config_eee_phy(phydev);
+}
+
+void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
+			 enum mac_version ver)
+{
+	static const rtl_phy_cfg_fct phy_configs[] = {
+		/* PCI devices. */
+		[RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
+		[RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
+		[RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
+		[RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config,
+		[RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config,
+		/* PCI-E devices. */
+		[RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_10] = NULL,
+		[RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
+		[RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config,
+		[RTL_GIGA_MAC_VER_13] = NULL,
+		[RTL_GIGA_MAC_VER_14] = NULL,
+		[RTL_GIGA_MAC_VER_15] = NULL,
+		[RTL_GIGA_MAC_VER_16] = NULL,
+		[RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
+		[RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config,
+		[RTL_GIGA_MAC_VER_22] = rtl8168c_3_hw_phy_config,
+		[RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config,
+		[RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config,
+		[RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_31] = NULL,
+		[RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config,
+		[RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
+		[RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
+		[RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_41] = NULL,
+		[RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
+		[RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config,
+		[RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config,
+		[RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config,
+	};
+
+	if (phy_configs[ver])
+		phy_configs[ver](tp, phydev);
+}
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 4b13a184bfc7..067ad25553b9 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1425,7 +1425,7 @@ out_napi_off:
 }
 
 /* Timeout function for Ethernet AVB */
-static void ravb_tx_timeout(struct net_device *ndev)
+static void ravb_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 3591285250e1..58ca126518a2 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2482,7 +2482,7 @@ out_napi_off:
 }
 
 /* Timeout function */
-static void sh_eth_tx_timeout(struct net_device *ndev)
+static void sh_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	struct sh_eth_rxdesc *rxdesc;
@@ -2647,20 +2647,6 @@ static int sh_eth_close(struct net_device *ndev)
 	return 0;
 }
 
-/* ioctl to device function */
-static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
-{
-	struct phy_device *phydev = ndev->phydev;
-
-	if (!netif_running(ndev))
-		return -EINVAL;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return phy_mii_ioctl(phydev, rq, cmd);
-}
-
 static int sh_eth_change_mtu(struct net_device *ndev, int new_mtu)
 {
 	if (netif_running(ndev))
@@ -3159,7 +3145,7 @@ static const struct net_device_ops sh_eth_netdev_ops = {
 	.ndo_get_stats		= sh_eth_get_stats,
 	.ndo_set_rx_mode	= sh_eth_set_rx_mode,
 	.ndo_tx_timeout		= sh_eth_tx_timeout,
-	.ndo_do_ioctl		= sh_eth_do_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_change_mtu		= sh_eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
@@ -3175,7 +3161,7 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = {
 	.ndo_vlan_rx_add_vid	= sh_eth_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= sh_eth_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= sh_eth_tx_timeout,
-	.ndo_do_ioctl		= sh_eth_do_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_change_mtu		= sh_eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index bc4f951315da..7585cd2270ba 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2159,7 +2159,7 @@ static void rocker_router_fib_event_work(struct work_struct *work)
 	/* Protect internal structures from changes */
 	rtnl_lock();
 	switch (fib_work->event) {
-	case FIB_EVENT_ENTRY_ADD:
+	case FIB_EVENT_ENTRY_REPLACE:
 		err = rocker_world_fib4_add(rocker, &fib_work->fen_info);
 		if (err)
 			rocker_world_fib4_abort(rocker);
@@ -2201,7 +2201,7 @@ static int rocker_router_fib_event(struct notifier_block *nb,
 	fib_work->event = event;
 
 	switch (event) {
-	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		if (info->family == AF_INET) {
 			struct fib_entry_notifier_info *fen_info = ptr;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index 52ed111d98f4..c705743d69f7 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1572,7 +1572,7 @@ static int sxgbe_poll(struct napi_struct *napi, int budget)
  *   netdev structure and arrange for the device to be reset to a sane state
  *   in order to transmit a new packet.
  */
-static void sxgbe_tx_timeout(struct net_device *dev)
+static void sxgbe_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct sxgbe_priv_data *priv = netdev_priv(dev);
 
@@ -1939,9 +1939,7 @@ static int sxgbe_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!dev->phydev)
-			return -EINVAL;
-		ret = phy_mii_ioctl(dev->phydev, rq, cmd);
+		ret = phy_do_ioctl(dev, rq, cmd);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 632a7c85964d..128ee7cda1ed 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -79,7 +79,7 @@ static netdev_tx_t	ether3_sendpacket(struct sk_buff *skb,
 static irqreturn_t ether3_interrupt (int irq, void *dev_id);
 static int	ether3_close (struct net_device *dev);
 static void	ether3_setmulticastlist (struct net_device *dev);
-static void	ether3_timeout(struct net_device *dev);
+static void	ether3_timeout(struct net_device *dev, unsigned int txqueue);
 
 #define BUS_16		2
 #define BUS_8		1
@@ -450,7 +450,7 @@ static void ether3_setmulticastlist(struct net_device *dev)
 	ether3_outw(priv(dev)->regs.config1 | CFG1_LOCBUFMEM, REG_CONFIG1);
 }
 
-static void ether3_timeout(struct net_device *dev)
+static void ether3_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	unsigned long flags;
 
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index 276c7cae7cee..8507ff242014 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -645,7 +645,7 @@ sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static void timeout(struct net_device *dev)
+static void timeout(struct net_device *dev, unsigned int txqueue)
 {
 	printk(KERN_NOTICE "%s: transmit timed out, resetting\n", dev->name);
 	sgiseeq_reset(dev);
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 5f36774bf4b8..ea5a9220196c 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -21,8 +21,6 @@ config SFC
 	depends on PCI
 	select MDIO
 	select CRC32
-	select I2C
-	select I2C_ALGOBIT
 	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports 10/40-gigabit Ethernet cards based on
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index c5c297e78d06..87d093da22ca 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
-sfc-y			+= efx.o nic.o farch.o siena.o ef10.o tx.o rx.o \
-			   selftest.o ethtool.o ptp.o tx_tso.o \
-			   mcdi.o mcdi_port.o mcdi_mon.o
+sfc-y			+= efx.o efx_common.o efx_channels.o nic.o \
+			   farch.o siena.o ef10.o \
+			   tx.o tx_common.o tx_tso.o rx.o rx_common.o \
+			   selftest.o ethtool.o ethtool_common.o ptp.o \
+			   mcdi.o mcdi_port.o mcdi_port_common.o \
+			   mcdi_functions.o mcdi_filters.o mcdi_mon.o
 sfc-$(CONFIG_SFC_MTD)	+= mtd.o
 sfc-$(CONFIG_SFC_SRIOV)	+= sriov.o siena_sriov.o ef10_sriov.o
 
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 4d9bbccc6f89..52113b7529d6 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -5,11 +5,15 @@
  */
 
 #include "net_driver.h"
+#include "rx_common.h"
 #include "ef10_regs.h"
 #include "io.h"
 #include "mcdi.h"
 #include "mcdi_pcol.h"
+#include "mcdi_port_common.h"
+#include "mcdi_functions.h"
 #include "nic.h"
+#include "mcdi_filters.h"
 #include "workarounds.h"
 #include "selftest.h"
 #include "ef10_sriov.h"
@@ -25,28 +29,6 @@ enum {
 	EFX_EF10_TEST = 1,
 	EFX_EF10_REFILL,
 };
-/* The maximum size of a shared RSS context */
-/* TODO: this should really be from the mcdi protocol export */
-#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
-
-/* The filter table(s) are managed by firmware and we have write-only
- * access.  When removing filters we must identify them to the
- * firmware by a 64-bit handle, but this is too wide for Linux kernel
- * interfaces (32-bit for RX NFC, 16-bit for RFS).  Also, we need to
- * be able to tell in advance whether a requested insertion will
- * replace an existing filter.  Therefore we maintain a software hash
- * table, which should be at least as large as the hardware hash
- * table.
- *
- * Huntington has a single 8K filter table shared between all filter
- * types and both ports.
- */
-#define HUNT_FILTER_TBL_ROWS 8192
-
-#define EFX_EF10_FILTER_ID_INVALID 0xffff
-
-#define EFX_EF10_FILTER_DEV_UC_MAX	32
-#define EFX_EF10_FILTER_DEV_MC_MAX	256
 
 /* VLAN list entry */
 struct efx_ef10_vlan {
@@ -54,95 +36,8 @@ struct efx_ef10_vlan {
 	u16 vid;
 };
 
-enum efx_ef10_default_filters {
-	EFX_EF10_BCAST,
-	EFX_EF10_UCDEF,
-	EFX_EF10_MCDEF,
-	EFX_EF10_VXLAN4_UCDEF,
-	EFX_EF10_VXLAN4_MCDEF,
-	EFX_EF10_VXLAN6_UCDEF,
-	EFX_EF10_VXLAN6_MCDEF,
-	EFX_EF10_NVGRE4_UCDEF,
-	EFX_EF10_NVGRE4_MCDEF,
-	EFX_EF10_NVGRE6_UCDEF,
-	EFX_EF10_NVGRE6_MCDEF,
-	EFX_EF10_GENEVE4_UCDEF,
-	EFX_EF10_GENEVE4_MCDEF,
-	EFX_EF10_GENEVE6_UCDEF,
-	EFX_EF10_GENEVE6_MCDEF,
-
-	EFX_EF10_NUM_DEFAULT_FILTERS
-};
-
-/* Per-VLAN filters information */
-struct efx_ef10_filter_vlan {
-	struct list_head list;
-	u16 vid;
-	u16 uc[EFX_EF10_FILTER_DEV_UC_MAX];
-	u16 mc[EFX_EF10_FILTER_DEV_MC_MAX];
-	u16 default_filters[EFX_EF10_NUM_DEFAULT_FILTERS];
-};
-
-struct efx_ef10_dev_addr {
-	u8 addr[ETH_ALEN];
-};
-
-struct efx_ef10_filter_table {
-/* The MCDI match masks supported by this fw & hw, in order of priority */
-	u32 rx_match_mcdi_flags[
-		MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2];
-	unsigned int rx_match_count;
-
-	struct rw_semaphore lock; /* Protects entries */
-	struct {
-		unsigned long spec;	/* pointer to spec plus flag bits */
-/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */
-/* unused flag	1UL */
-#define EFX_EF10_FILTER_FLAG_AUTO_OLD	2UL
-#define EFX_EF10_FILTER_FLAGS		3UL
-		u64 handle;		/* firmware handle */
-	} *entry;
-/* Shadow of net_device address lists, guarded by mac_lock */
-	struct efx_ef10_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX];
-	struct efx_ef10_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX];
-	int dev_uc_count;
-	int dev_mc_count;
-	bool uc_promisc;
-	bool mc_promisc;
-/* Whether in multicast promiscuous mode when last changed */
-	bool mc_promisc_last;
-	bool mc_overflow; /* Too many MC addrs; should always imply mc_promisc */
-	bool vlan_filter;
-	struct list_head vlan_list;
-};
-
-/* An arbitrary search limit for the software hash table */
-#define EFX_EF10_FILTER_SEARCH_LIMIT 200
-
-static void efx_ef10_rx_free_indir_table(struct efx_nic *efx);
-static void efx_ef10_filter_table_remove(struct efx_nic *efx);
-static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid);
-static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx,
-					      struct efx_ef10_filter_vlan *vlan);
-static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid);
 static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading);
 
-static u32 efx_ef10_filter_get_unsafe_id(u32 filter_id)
-{
-	WARN_ON_ONCE(filter_id == EFX_EF10_FILTER_ID_INVALID);
-	return filter_id & (HUNT_FILTER_TBL_ROWS - 1);
-}
-
-static unsigned int efx_ef10_filter_get_unsafe_pri(u32 filter_id)
-{
-	return filter_id / (HUNT_FILTER_TBL_ROWS * 2);
-}
-
-static u32 efx_ef10_make_filter_id(unsigned int pri, u16 idx)
-{
-	return pri * HUNT_FILTER_TBL_ROWS * 2 + idx;
-}
-
 static int efx_ef10_get_warm_boot_count(struct efx_nic *efx)
 {
 	efx_dword_t reg;
@@ -185,24 +80,6 @@ static bool efx_ef10_is_vf(struct efx_nic *efx)
 	return efx->type->is_vf;
 }
 
-static int efx_ef10_get_pf_index(struct efx_nic *efx)
-{
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN);
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	size_t outlen;
-	int rc;
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf,
-			  sizeof(outbuf), &outlen);
-	if (rc)
-		return rc;
-	if (outlen < sizeof(outbuf))
-		return -EIO;
-
-	nic_data->pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF);
-	return 0;
-}
-
 #ifdef CONFIG_SFC_SRIOV
 static int efx_ef10_get_vf_index(struct efx_nic *efx)
 {
@@ -273,24 +150,9 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
 		u8 vi_window_mode = MCDI_BYTE(outbuf,
 				GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE);
 
-		switch (vi_window_mode) {
-		case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K:
-			efx->vi_stride = 8192;
-			break;
-		case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K:
-			efx->vi_stride = 16384;
-			break;
-		case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K:
-			efx->vi_stride = 65536;
-			break;
-		default:
-			netif_err(efx, probe, efx->net_dev,
-				  "Unrecognised VI window mode %d\n",
-				  vi_window_mode);
-			return -EIO;
-		}
-		netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n",
-			  efx->vi_stride);
+		rc = efx_mcdi_window_mode_to_stride(efx, vi_window_mode);
+		if (rc)
+			return rc;
 	} else {
 		/* keep default VI stride */
 		netif_dbg(efx, probe, efx->net_dev,
@@ -576,7 +438,7 @@ static int efx_ef10_add_vlan(struct efx_nic *efx, u16 vid)
 	if (efx->filter_state) {
 		mutex_lock(&efx->mac_lock);
 		down_write(&efx->filter_sem);
-		rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
+		rc = efx_mcdi_filter_add_vlan(efx, vlan->vid);
 		up_write(&efx->filter_sem);
 		mutex_unlock(&efx->mac_lock);
 		if (rc)
@@ -605,7 +467,7 @@ static void efx_ef10_del_vlan_internal(struct efx_nic *efx,
 
 	if (efx->filter_state) {
 		down_write(&efx->filter_sem);
-		efx_ef10_filter_del_vlan(efx, vlan->vid);
+		efx_mcdi_filter_del_vlan(efx, vlan->vid);
 		up_write(&efx->filter_sem);
 	}
 
@@ -689,7 +551,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	}
 	nic_data->warm_boot_count = rc;
 
-	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
 
 	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
 
@@ -725,7 +587,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	if (rc)
 		goto fail4;
 
-	rc = efx_ef10_get_pf_index(efx);
+	rc = efx_get_pf_index(efx, &nic_data->pf_index);
 	if (rc)
 		goto fail5;
 
@@ -831,22 +693,6 @@ fail1:
 	return rc;
 }
 
-static int efx_ef10_free_vis(struct efx_nic *efx)
-{
-	MCDI_DECLARE_BUF_ERR(outbuf);
-	size_t outlen;
-	int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0,
-				    outbuf, sizeof(outbuf), &outlen);
-
-	/* -EALREADY means nothing to free, so ignore */
-	if (rc == -EALREADY)
-		rc = 0;
-	if (rc)
-		efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen,
-				       rc);
-	return rc;
-}
-
 #ifdef EFX_USE_PIO
 
 static void efx_ef10_free_piobufs(struct efx_nic *efx)
@@ -1084,12 +930,12 @@ static void efx_ef10_remove(struct efx_nic *efx)
 
 	efx_mcdi_mon_remove(efx);
 
-	efx_ef10_rx_free_indir_table(efx);
+	efx_mcdi_rx_free_indir_table(efx);
 
 	if (nic_data->wc_membase)
 		iounmap(nic_data->wc_membase);
 
-	rc = efx_ef10_free_vis(efx);
+	rc = efx_mcdi_free_vis(efx);
 	WARN_ON(rc != 0);
 
 	if (!nic_data->must_restore_piobufs)
@@ -1260,28 +1106,10 @@ static int efx_ef10_probe_vf(struct efx_nic *efx __attribute__ ((unused)))
 static int efx_ef10_alloc_vis(struct efx_nic *efx,
 			      unsigned int min_vis, unsigned int max_vis)
 {
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	size_t outlen;
-	int rc;
-
-	MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
-	MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
-	rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
-			  outbuf, sizeof(outbuf), &outlen);
-	if (rc != 0)
-		return rc;
-
-	if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
-		return -EIO;
 
-	netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
-		  MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
-
-	nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
-	nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
-	return 0;
+	return efx_mcdi_alloc_vis(efx, min_vis, max_vis, &nic_data->vi_base,
+				  &nic_data->n_allocated_vis);
 }
 
 /* Note that the failure path of this function does not free
@@ -1363,7 +1191,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 	}
 
 	/* In case the last attached driver failed to free VIs, do it now */
-	rc = efx_ef10_free_vis(efx);
+	rc = efx_mcdi_free_vis(efx);
 	if (rc != 0)
 		return rc;
 
@@ -1384,7 +1212,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 		efx->max_tx_channels =
 			nic_data->n_allocated_vis / EFX_TXQ_TYPES;
 
-		efx_ef10_free_vis(efx);
+		efx_mcdi_free_vis(efx);
 		return -EAGAIN;
 	}
 
@@ -1401,7 +1229,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 	}
 
 	/* Shrink the original UC mapping of the memory BAR */
-	membase = ioremap_nocache(efx->membase_phys, uc_mem_map_size);
+	membase = ioremap(efx->membase_phys, uc_mem_map_size);
 	if (!membase) {
 		netif_err(efx, probe, efx->net_dev,
 			  "could not shrink memory BAR to %x\n",
@@ -1490,7 +1318,7 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 	return 0;
 }
 
-static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
+static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 #ifdef CONFIG_SFC_SRIOV
@@ -1503,7 +1331,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
 	nic_data->must_restore_filters = true;
 	nic_data->must_restore_piobufs = true;
 	efx_ef10_forget_old_piobufs(efx);
-	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
 
 	/* Driver-created vswitches and vports must be re-created */
 	nic_data->must_probe_vswitching = true;
@@ -1571,7 +1399,7 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type)
 	 */
 	if ((reset_type == RESET_TYPE_ALL ||
 	     reset_type == RESET_TYPE_MCDI_TIMEOUT) && !rc)
-		efx_ef10_reset_mc_allocations(efx);
+		efx_ef10_table_reset_mc_allocations(efx);
 	return rc;
 }
 
@@ -2187,7 +2015,7 @@ static void efx_ef10_mcdi_reboot_detected(struct efx_nic *efx)
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 
 	/* All our allocations have been reset */
-	efx_ef10_reset_mc_allocations(efx);
+	efx_ef10_table_reset_mc_allocations(efx);
 
 	/* The datapath firmware might have been changed */
 	nic_data->must_check_datapath_caps = true;
@@ -2408,20 +2236,15 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx)
 
 static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 {
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
-						       EFX_BUF_SIZE));
 	bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
-	size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
 	struct efx_channel *channel = tx_queue->channel;
 	struct efx_nic *efx = tx_queue->efx;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_ef10_nic_data *nic_data;
 	bool tso_v2 = false;
-	size_t inlen;
-	dma_addr_t dma_addr;
 	efx_qword_t *txd;
 	int rc;
-	int i;
-	BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+
+	nic_data = efx->nic_data;
 
 	/* Only attempt to enable TX timestamping if we have the license for it,
 	 * otherwise TXQ init will fail
@@ -2448,51 +2271,9 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 				channel->channel);
 	}
 
-	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
-	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
-	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
-	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
-	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
-	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
-
-	dma_addr = tx_queue->txd.buf.dma_addr;
-
-	netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
-		  tx_queue->queue, entries, (u64)dma_addr);
-
-	for (i = 0; i < entries; ++i) {
-		MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
-		dma_addr += EFX_BUF_SIZE;
-	}
-
-	inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
-
-	do {
-		MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
-				/* This flag was removed from mcdi_pcol.h for
-				 * the non-_EXT version of INIT_TXQ.  However,
-				 * firmware still honours it.
-				 */
-				INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
-				INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
-				INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
-				INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
-						tx_queue->timestamping);
-
-		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
-					NULL, 0, NULL);
-		if (rc == -ENOSPC && tso_v2) {
-			/* Retry without TSOv2 if we're short on contexts. */
-			tso_v2 = false;
-			netif_warn(efx, probe, efx->net_dev,
-				   "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n");
-		} else if (rc) {
-			efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
-					       MC_CMD_INIT_TXQ_EXT_IN_LEN,
-					       NULL, 0, rc);
-			goto fail;
-		}
-	} while (rc);
+	rc = efx_mcdi_tx_init(tx_queue, tso_v2);
+	if (rc)
+		goto fail;
 
 	/* A previous user of this TX queue might have set us up the
 	 * bomb by writing a descriptor to the TX push collector but
@@ -2530,35 +2311,6 @@ fail:
 		    tx_queue->queue);
 }
 
-static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
-	MCDI_DECLARE_BUF_ERR(outbuf);
-	struct efx_nic *efx = tx_queue->efx;
-	size_t outlen;
-	int rc;
-
-	MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
-		       tx_queue->queue);
-
-	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
-			  outbuf, sizeof(outbuf), &outlen);
-
-	if (rc && rc != -EALREADY)
-		goto fail;
-
-	return;
-
-fail:
-	efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN,
-			       outbuf, outlen, rc);
-}
-
-static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue)
-{
-	efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
-}
-
 /* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
 static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
 {
@@ -2637,527 +2389,6 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
 	}
 }
 
-#define RSS_MODE_HASH_ADDRS	(1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
-				 1 << RSS_MODE_HASH_DST_ADDR_LBN)
-#define RSS_MODE_HASH_PORTS	(1 << RSS_MODE_HASH_SRC_PORT_LBN |\
-				 1 << RSS_MODE_HASH_DST_PORT_LBN)
-#define RSS_CONTEXT_FLAGS_DEFAULT	(1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
-					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
-					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
-					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
-					 (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
-					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
-					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
-					 (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
-					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
-					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
-
-static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
-{
-	/* Firmware had a bug (sfc bug 61952) where it would not actually
-	 * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
-	 * This meant that it would always contain whatever was previously
-	 * in the MCDI buffer.  Fortunately, all firmware versions with
-	 * this bug have the same default flags value for a newly-allocated
-	 * RSS context, and the only time we want to get the flags is just
-	 * after allocating.  Moreover, the response has a 32-bit hole
-	 * where the context ID would be in the request, so we can use an
-	 * overlength buffer in the request and pre-fill the flags field
-	 * with what we believe the default to be.  Thus if the firmware
-	 * has the bug, it will leave our pre-filled value in the flags
-	 * field of the response, and we will get the right answer.
-	 *
-	 * However, this does mean that this function should NOT be used if
-	 * the RSS context flags might not be their defaults - it is ONLY
-	 * reliably correct for a newly-allocated RSS context.
-	 */
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
-	size_t outlen;
-	int rc;
-
-	/* Check we have a hole for the context ID */
-	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
-		       RSS_CONTEXT_FLAGS_DEFAULT);
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
-			  sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
-	if (rc == 0) {
-		if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
-			rc = -EIO;
-		else
-			*flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
-	}
-	return rc;
-}
-
-/* Attempt to enable 4-tuple UDP hashing on the specified RSS context.
- * If we fail, we just leave the RSS context at its default hash settings,
- * which is safe but may slightly reduce performance.
- * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
- * just need to set the UDP ports flags (for both IP versions).
- */
-static void efx_ef10_set_rss_flags(struct efx_nic *efx,
-				   struct efx_rss_context *ctx)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
-	u32 flags;
-
-	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
-
-	if (efx_ef10_get_rss_flags(efx, ctx->context_id, &flags) != 0)
-		return;
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
-		       ctx->context_id);
-	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
-	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
-	if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
-			  NULL, 0, NULL))
-		/* Succeeded, so UDP 4-tuple is now enabled */
-		ctx->rx_hash_udp_4tuple = true;
-}
-
-static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive,
-				      struct efx_rss_context *ctx,
-				      unsigned *context_size)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	size_t outlen;
-	int rc;
-	u32 alloc_type = exclusive ?
-				MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE :
-				MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED;
-	unsigned rss_spread = exclusive ?
-				efx->rss_spread :
-				min(rounddown_pow_of_two(efx->rss_spread),
-				    EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
-
-	if (!exclusive && rss_spread == 1) {
-		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
-		if (context_size)
-			*context_size = 1;
-		return 0;
-	}
-
-	if (nic_data->datapath_caps &
-	    1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN)
-		return -EOPNOTSUPP;
-
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
-		       nic_data->vport_id);
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type);
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf),
-		outbuf, sizeof(outbuf), &outlen);
-	if (rc != 0)
-		return rc;
-
-	if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
-		return -EIO;
-
-	ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
-
-	if (context_size)
-		*context_size = rss_spread;
-
-	if (nic_data->datapath_caps &
-	    1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
-		efx_ef10_set_rss_flags(efx, ctx);
-
-	return 0;
-}
-
-static int efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
-
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
-		       context);
-	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
-			    NULL, 0, NULL);
-}
-
-static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
-				       const u32 *rx_indir_table, const u8 *key)
-{
-	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN);
-	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN);
-	int i, rc;
-
-	MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
-		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
-		     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
-
-	/* This iterates over the length of efx->rss_context.rx_indir_table, but
-	 * copies bytes from rx_indir_table.  That's because the latter is a
-	 * pointer rather than an array, but should have the same length.
-	 * The efx->rss_context.rx_hash_key loop below is similar.
-	 */
-	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
-		MCDI_PTR(tablebuf,
-			 RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
-				(u8) rx_indir_table[i];
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf,
-			  sizeof(tablebuf), NULL, 0, NULL);
-	if (rc != 0)
-		return rc;
-
-	MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
-		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
-		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
-	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
-		MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
-
-	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
-			    sizeof(keybuf), NULL, 0, NULL);
-}
-
-static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
-{
-	int rc;
-
-	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) {
-		rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id);
-		WARN_ON(rc != 0);
-	}
-	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
-}
-
-static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx,
-					      unsigned *context_size)
-{
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	int rc = efx_ef10_alloc_rss_context(efx, false, &efx->rss_context,
-					    context_size);
-
-	if (rc != 0)
-		return rc;
-
-	nic_data->rx_rss_context_exclusive = false;
-	efx_set_default_rx_indir_table(efx, &efx->rss_context);
-	return 0;
-}
-
-static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
-						 const u32 *rx_indir_table,
-						 const u8 *key)
-{
-	u32 old_rx_rss_context = efx->rss_context.context_id;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	int rc;
-
-	if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID ||
-	    !nic_data->rx_rss_context_exclusive) {
-		rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context,
-						NULL);
-		if (rc == -EOPNOTSUPP)
-			return rc;
-		else if (rc != 0)
-			goto fail1;
-	}
-
-	rc = efx_ef10_populate_rss_table(efx, efx->rss_context.context_id,
-					 rx_indir_table, key);
-	if (rc != 0)
-		goto fail2;
-
-	if (efx->rss_context.context_id != old_rx_rss_context &&
-	    old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
-		WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0);
-	nic_data->rx_rss_context_exclusive = true;
-	if (rx_indir_table != efx->rss_context.rx_indir_table)
-		memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
-		       sizeof(efx->rss_context.rx_indir_table));
-	if (key != efx->rss_context.rx_hash_key)
-		memcpy(efx->rss_context.rx_hash_key, key,
-		       efx->type->rx_hash_key_size);
-
-	return 0;
-
-fail2:
-	if (old_rx_rss_context != efx->rss_context.context_id) {
-		WARN_ON(efx_ef10_free_rss_context(efx, efx->rss_context.context_id) != 0);
-		efx->rss_context.context_id = old_rx_rss_context;
-	}
-fail1:
-	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
-	return rc;
-}
-
-static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
-					       struct efx_rss_context *ctx,
-					       const u32 *rx_indir_table,
-					       const u8 *key)
-{
-	int rc;
-
-	WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
-	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
-		rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
-		if (rc)
-			return rc;
-	}
-
-	if (!rx_indir_table) /* Delete this context */
-		return efx_ef10_free_rss_context(efx, ctx->context_id);
-
-	rc = efx_ef10_populate_rss_table(efx, ctx->context_id,
-					 rx_indir_table, key);
-	if (rc)
-		return rc;
-
-	memcpy(ctx->rx_indir_table, rx_indir_table,
-	       sizeof(efx->rss_context.rx_indir_table));
-	memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
-
-	return 0;
-}
-
-static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
-					       struct efx_rss_context *ctx)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
-	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
-	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
-	size_t outlen;
-	int rc, i;
-
-	WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
-	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
-		     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
-
-	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
-		return -ENOENT;
-
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
-		       ctx->context_id);
-	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
-		     MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
-			  tablebuf, sizeof(tablebuf), &outlen);
-	if (rc != 0)
-		return rc;
-
-	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
-		return -EIO;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
-		ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
-				RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
-
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
-		       ctx->context_id);
-	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
-		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
-			  keybuf, sizeof(keybuf), &outlen);
-	if (rc != 0)
-		return rc;
-
-	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
-		return -EIO;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
-		ctx->rx_hash_key[i] = MCDI_PTR(
-				keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
-
-	return 0;
-}
-
-static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
-{
-	int rc;
-
-	mutex_lock(&efx->rss_lock);
-	rc = efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
-	mutex_unlock(&efx->rss_lock);
-	return rc;
-}
-
-static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
-{
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	struct efx_rss_context *ctx;
-	int rc;
-
-	WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
-	if (!nic_data->must_restore_rss_contexts)
-		return;
-
-	list_for_each_entry(ctx, &efx->rss_context.list, list) {
-		/* previous NIC RSS context is gone */
-		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
-		/* so try to allocate a new one */
-		rc = efx_ef10_rx_push_rss_context_config(efx, ctx,
-							 ctx->rx_indir_table,
-							 ctx->rx_hash_key);
-		if (rc)
-			netif_warn(efx, probe, efx->net_dev,
-				   "failed to restore RSS context %u, rc=%d"
-				   "; RSS filters may fail to be applied\n",
-				   ctx->user_id, rc);
-	}
-	nic_data->must_restore_rss_contexts = false;
-}
-
-static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
-					  const u32 *rx_indir_table,
-					  const u8 *key)
-{
-	int rc;
-
-	if (efx->rss_spread == 1)
-		return 0;
-
-	if (!key)
-		key = efx->rss_context.rx_hash_key;
-
-	rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
-
-	if (rc == -ENOBUFS && !user) {
-		unsigned context_size;
-		bool mismatch = false;
-		size_t i;
-
-		for (i = 0;
-		     i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
-		     i++)
-			mismatch = rx_indir_table[i] !=
-				ethtool_rxfh_indir_default(i, efx->rss_spread);
-
-		rc = efx_ef10_rx_push_shared_rss_config(efx, &context_size);
-		if (rc == 0) {
-			if (context_size != efx->rss_spread)
-				netif_warn(efx, probe, efx->net_dev,
-					   "Could not allocate an exclusive RSS"
-					   " context; allocated a shared one of"
-					   " different size."
-					   " Wanted %u, got %u.\n",
-					   efx->rss_spread, context_size);
-			else if (mismatch)
-				netif_warn(efx, probe, efx->net_dev,
-					   "Could not allocate an exclusive RSS"
-					   " context; allocated a shared one but"
-					   " could not apply custom"
-					   " indirection.\n");
-			else
-				netif_info(efx, probe, efx->net_dev,
-					   "Could not allocate an exclusive RSS"
-					   " context; allocated a shared one.\n");
-		}
-	}
-	return rc;
-}
-
-static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
-					  const u32 *rx_indir_table
-					  __attribute__ ((unused)),
-					  const u8 *key
-					  __attribute__ ((unused)))
-{
-	if (user)
-		return -EOPNOTSUPP;
-	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID)
-		return 0;
-	return efx_ef10_rx_push_shared_rss_config(efx, NULL);
-}
-
-static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue)
-{
-	return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf,
-				    (rx_queue->ptr_mask + 1) *
-				    sizeof(efx_qword_t),
-				    GFP_KERNEL);
-}
-
-static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue)
-{
-	MCDI_DECLARE_BUF(inbuf,
-			 MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
-						EFX_BUF_SIZE));
-	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
-	size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
-	struct efx_nic *efx = rx_queue->efx;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	size_t inlen;
-	dma_addr_t dma_addr;
-	int rc;
-	int i;
-	BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0);
-
-	rx_queue->scatter_n = 0;
-	rx_queue->scatter_len = 0;
-
-	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1);
-	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel);
-	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue));
-	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE,
-		       efx_rx_queue_index(rx_queue));
-	MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS,
-			      INIT_RXQ_IN_FLAG_PREFIX, 1,
-			      INIT_RXQ_IN_FLAG_TIMESTAMP, 1);
-	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
-	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id);
-
-	dma_addr = rx_queue->rxd.buf.dma_addr;
-
-	netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n",
-		  efx_rx_queue_index(rx_queue), entries, (u64)dma_addr);
-
-	for (i = 0; i < entries; ++i) {
-		MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr);
-		dma_addr += EFX_BUF_SIZE;
-	}
-
-	inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
-			  NULL, 0, NULL);
-	if (rc)
-		netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n",
-			    efx_rx_queue_index(rx_queue));
-}
-
-static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
-	MCDI_DECLARE_BUF_ERR(outbuf);
-	struct efx_nic *efx = rx_queue->efx;
-	size_t outlen;
-	int rc;
-
-	MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE,
-		       efx_rx_queue_index(rx_queue));
-
-	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf),
-			  outbuf, sizeof(outbuf), &outlen);
-
-	if (rc && rc != -EALREADY)
-		goto fail;
-
-	return;
-
-fail:
-	efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN,
-			       outbuf, outlen, rc);
-}
-
-static void efx_ef10_rx_remove(struct efx_rx_queue *rx_queue)
-{
-	efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf);
-}
-
 /* This creates an entry in the RX descriptor queue */
 static inline void
 efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
@@ -3229,106 +2460,20 @@ efx_ef10_rx_defer_refill_complete(struct efx_nic *efx, unsigned long cookie,
 	/* nothing to do */
 }
 
-static int efx_ef10_ev_probe(struct efx_channel *channel)
-{
-	return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf,
-				    (channel->eventq_mask + 1) *
-				    sizeof(efx_qword_t),
-				    GFP_KERNEL);
-}
-
-static void efx_ef10_ev_fini(struct efx_channel *channel)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN);
-	MCDI_DECLARE_BUF_ERR(outbuf);
-	struct efx_nic *efx = channel->efx;
-	size_t outlen;
-	int rc;
-
-	MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel);
-
-	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf),
-			  outbuf, sizeof(outbuf), &outlen);
-
-	if (rc && rc != -EALREADY)
-		goto fail;
-
-	return;
-
-fail:
-	efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN,
-			       outbuf, outlen, rc);
-}
-
 static int efx_ef10_ev_init(struct efx_channel *channel)
 {
-	MCDI_DECLARE_BUF(inbuf,
-			 MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
-						   EFX_BUF_SIZE));
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
-	size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
 	struct efx_nic *efx = channel->efx;
 	struct efx_ef10_nic_data *nic_data;
-	size_t inlen, outlen;
 	unsigned int enabled, implemented;
-	dma_addr_t dma_addr;
+	bool use_v2, cut_thru;
 	int rc;
-	int i;
 
 	nic_data = efx->nic_data;
-
-	/* Fill event queue with all ones (i.e. empty events) */
-	memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
-
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1);
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
-	/* INIT_EVQ expects index in vector table, not absolute */
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
-		       MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0);
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE,
-		       MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
-	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
-
-	if (nic_data->datapath_caps2 &
-	    1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) {
-		/* Use the new generic approach to specifying event queue
-		 * configuration, requesting lower latency or higher throughput.
-		 * The options that actually get used appear in the output.
-		 */
-		MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
-				      INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
-				      INIT_EVQ_V2_IN_FLAG_TYPE,
-				      MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
-	} else {
-		bool cut_thru = !(nic_data->datapath_caps &
-			1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
-
-		MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
-				      INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
-				      INIT_EVQ_IN_FLAG_RX_MERGE, 1,
-				      INIT_EVQ_IN_FLAG_TX_MERGE, 1,
-				      INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru);
-	}
-
-	dma_addr = channel->eventq.buf.dma_addr;
-	for (i = 0; i < entries; ++i) {
-		MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
-		dma_addr += EFX_BUF_SIZE;
-	}
-
-	inlen = MC_CMD_INIT_EVQ_IN_LEN(entries);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
-			  outbuf, sizeof(outbuf), &outlen);
-
-	if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
-		netif_dbg(efx, drv, efx->net_dev,
-			  "Channel %d using event queue flags %08x\n",
-			  channel->channel,
-			  MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+	use_v2 = nic_data->datapath_caps2 &
+			    1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN;
+	cut_thru = !(nic_data->datapath_caps &
+			      1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
+	rc = efx_mcdi_ev_init(channel, cut_thru, use_v2);
 
 	/* IRQ return is ignored */
 	if (channel->channel || rc)
@@ -3386,15 +2531,10 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
 		return 0;
 
 fail:
-	efx_ef10_ev_fini(channel);
+	efx_mcdi_ev_fini(channel);
 	return rc;
 }
 
-static void efx_ef10_ev_remove(struct efx_channel *channel)
-{
-	efx_nic_free_buffer(channel->efx, &channel->eventq.buf);
-}
-
 static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue,
 					   unsigned int rx_queue_label)
 {
@@ -3976,9 +3116,9 @@ static int efx_ef10_fini_dmaq(struct efx_nic *efx)
 	if (efx->state != STATE_RECOVERY) {
 		efx_for_each_channel(channel, efx) {
 			efx_for_each_channel_rx_queue(rx_queue, channel)
-				efx_ef10_rx_fini(rx_queue);
+				efx_mcdi_rx_fini(rx_queue);
 			efx_for_each_channel_tx_queue(tx_queue, channel)
-				efx_ef10_tx_fini(tx_queue);
+				efx_mcdi_tx_fini(tx_queue);
 		}
 
 		wait_event_timeout(efx->flush_wq,
@@ -4000,1538 +3140,6 @@ static void efx_ef10_prepare_flr(struct efx_nic *efx)
 	atomic_set(&efx->active_queues, 0);
 }
 
-/* Decide whether a filter should be exclusive or else should allow
- * delivery to additional recipients.  Currently we decide that
- * filters for specific local unicast MAC and IP addresses are
- * exclusive.
- */
-static bool efx_ef10_filter_is_exclusive(const struct efx_filter_spec *spec)
-{
-	if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC &&
-	    !is_multicast_ether_addr(spec->loc_mac))
-		return true;
-
-	if ((spec->match_flags &
-	     (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
-	    (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
-		if (spec->ether_type == htons(ETH_P_IP) &&
-		    !ipv4_is_multicast(spec->loc_host[0]))
-			return true;
-		if (spec->ether_type == htons(ETH_P_IPV6) &&
-		    ((const u8 *)spec->loc_host)[0] != 0xff)
-			return true;
-	}
-
-	return false;
-}
-
-static struct efx_filter_spec *
-efx_ef10_filter_entry_spec(const struct efx_ef10_filter_table *table,
-			   unsigned int filter_idx)
-{
-	return (struct efx_filter_spec *)(table->entry[filter_idx].spec &
-					  ~EFX_EF10_FILTER_FLAGS);
-}
-
-static unsigned int
-efx_ef10_filter_entry_flags(const struct efx_ef10_filter_table *table,
-			   unsigned int filter_idx)
-{
-	return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS;
-}
-
-static void
-efx_ef10_filter_set_entry(struct efx_ef10_filter_table *table,
-			  unsigned int filter_idx,
-			  const struct efx_filter_spec *spec,
-			  unsigned int flags)
-{
-	table->entry[filter_idx].spec =	(unsigned long)spec | flags;
-}
-
-static void
-efx_ef10_filter_push_prep_set_match_fields(struct efx_nic *efx,
-					   const struct efx_filter_spec *spec,
-					   efx_dword_t *inbuf)
-{
-	enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
-	u32 match_fields = 0, uc_match, mc_match;
-
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
-		       efx_ef10_filter_is_exclusive(spec) ?
-		       MC_CMD_FILTER_OP_IN_OP_INSERT :
-		       MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE);
-
-	/* Convert match flags and values.  Unlike almost
-	 * everything else in MCDI, these fields are in
-	 * network byte order.
-	 */
-#define COPY_VALUE(value, mcdi_field)					     \
-	do {							     \
-		match_fields |=					     \
-			1 << MC_CMD_FILTER_OP_IN_MATCH_ ##	     \
-			mcdi_field ## _LBN;			     \
-		BUILD_BUG_ON(					     \
-			MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \
-			sizeof(value));				     \
-		memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ##	mcdi_field), \
-		       &value, sizeof(value));			     \
-	} while (0)
-#define COPY_FIELD(gen_flag, gen_field, mcdi_field)			     \
-	if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) {     \
-		COPY_VALUE(spec->gen_field, mcdi_field);	     \
-	}
-	/* Handle encap filters first.  They will always be mismatch
-	 * (unknown UC or MC) filters
-	 */
-	if (encap_type) {
-		/* ether_type and outer_ip_proto need to be variables
-		 * because COPY_VALUE wants to memcpy them
-		 */
-		__be16 ether_type =
-			htons(encap_type & EFX_ENCAP_FLAG_IPV6 ?
-			      ETH_P_IPV6 : ETH_P_IP);
-		u8 vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE;
-		u8 outer_ip_proto;
-
-		switch (encap_type & EFX_ENCAP_TYPES_MASK) {
-		case EFX_ENCAP_TYPE_VXLAN:
-			vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN;
-			/* fallthrough */
-		case EFX_ENCAP_TYPE_GENEVE:
-			COPY_VALUE(ether_type, ETHER_TYPE);
-			outer_ip_proto = IPPROTO_UDP;
-			COPY_VALUE(outer_ip_proto, IP_PROTO);
-			/* We always need to set the type field, even
-			 * though we're not matching on the TNI.
-			 */
-			MCDI_POPULATE_DWORD_1(inbuf,
-				FILTER_OP_EXT_IN_VNI_OR_VSID,
-				FILTER_OP_EXT_IN_VNI_TYPE,
-				vni_type);
-			break;
-		case EFX_ENCAP_TYPE_NVGRE:
-			COPY_VALUE(ether_type, ETHER_TYPE);
-			outer_ip_proto = IPPROTO_GRE;
-			COPY_VALUE(outer_ip_proto, IP_PROTO);
-			break;
-		default:
-			WARN_ON(1);
-		}
-
-		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
-		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
-	} else {
-		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
-		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
-	}
-
-	if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG)
-		match_fields |=
-			is_multicast_ether_addr(spec->loc_mac) ?
-			1 << mc_match :
-			1 << uc_match;
-	COPY_FIELD(REM_HOST, rem_host, SRC_IP);
-	COPY_FIELD(LOC_HOST, loc_host, DST_IP);
-	COPY_FIELD(REM_MAC, rem_mac, SRC_MAC);
-	COPY_FIELD(REM_PORT, rem_port, SRC_PORT);
-	COPY_FIELD(LOC_MAC, loc_mac, DST_MAC);
-	COPY_FIELD(LOC_PORT, loc_port, DST_PORT);
-	COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE);
-	COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN);
-	COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN);
-	COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO);
-#undef COPY_FIELD
-#undef COPY_VALUE
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS,
-		       match_fields);
-}
-
-static void efx_ef10_filter_push_prep(struct efx_nic *efx,
-				      const struct efx_filter_spec *spec,
-				      efx_dword_t *inbuf, u64 handle,
-				      struct efx_rss_context *ctx,
-				      bool replacing)
-{
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	u32 flags = spec->flags;
-
-	memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
-
-	/* If RSS filter, caller better have given us an RSS context */
-	if (flags & EFX_FILTER_FLAG_RX_RSS) {
-		/* We don't have the ability to return an error, so we'll just
-		 * log a warning and disable RSS for the filter.
-		 */
-		if (WARN_ON_ONCE(!ctx))
-			flags &= ~EFX_FILTER_FLAG_RX_RSS;
-		else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID))
-			flags &= ~EFX_FILTER_FLAG_RX_RSS;
-	}
-
-	if (replacing) {
-		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
-			       MC_CMD_FILTER_OP_IN_OP_REPLACE);
-		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle);
-	} else {
-		efx_ef10_filter_push_prep_set_match_fields(efx, spec, inbuf);
-	}
-
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id);
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST,
-		       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
-		       MC_CMD_FILTER_OP_IN_RX_DEST_DROP :
-		       MC_CMD_FILTER_OP_IN_RX_DEST_HOST);
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0);
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST,
-		       MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT);
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE,
-		       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
-		       0 : spec->dmaq_id);
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE,
-		       (flags & EFX_FILTER_FLAG_RX_RSS) ?
-		       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
-		       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
-	if (flags & EFX_FILTER_FLAG_RX_RSS)
-		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
-}
-
-static int efx_ef10_filter_push(struct efx_nic *efx,
-				const struct efx_filter_spec *spec, u64 *handle,
-				struct efx_rss_context *ctx, bool replacing)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
-	size_t outlen;
-	int rc;
-
-	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
-	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
-				outbuf, sizeof(outbuf), &outlen);
-	if (rc && spec->priority != EFX_FILTER_PRI_HINT)
-		efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf),
-				       outbuf, outlen, rc);
-	if (rc == 0)
-		*handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
-	if (rc == -ENOSPC)
-		rc = -EBUSY; /* to match efx_farch_filter_insert() */
-	return rc;
-}
-
-static u32 efx_ef10_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec)
-{
-	enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
-	unsigned int match_flags = spec->match_flags;
-	unsigned int uc_match, mc_match;
-	u32 mcdi_flags = 0;
-
-#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field, encap) {		\
-		unsigned int  old_match_flags = match_flags;		\
-		match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag;		\
-		if (match_flags != old_match_flags)			\
-			mcdi_flags |=					\
-				(1 << ((encap) ?			\
-				       MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_ ## \
-				       mcdi_field ## _LBN :		\
-				       MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##\
-				       mcdi_field ## _LBN));		\
-	}
-	/* inner or outer based on encap type */
-	MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP, encap_type);
-	MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP, encap_type);
-	MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC, encap_type);
-	MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT, encap_type);
-	MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC, encap_type);
-	MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT, encap_type);
-	MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE, encap_type);
-	MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO, encap_type);
-	/* always outer */
-	MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN, false);
-	MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN, false);
-#undef MAP_FILTER_TO_MCDI_FLAG
-
-	/* special handling for encap type, and mismatch */
-	if (encap_type) {
-		match_flags &= ~EFX_FILTER_MATCH_ENCAP_TYPE;
-		mcdi_flags |=
-			(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
-		mcdi_flags |= (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
-
-		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
-		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
-	} else {
-		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
-		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
-	}
-
-	if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) {
-		match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG;
-		mcdi_flags |=
-			is_multicast_ether_addr(spec->loc_mac) ?
-			1 << mc_match :
-			1 << uc_match;
-	}
-
-	/* Did we map them all? */
-	WARN_ON_ONCE(match_flags);
-
-	return mcdi_flags;
-}
-
-static int efx_ef10_filter_pri(struct efx_ef10_filter_table *table,
-			       const struct efx_filter_spec *spec)
-{
-	u32 mcdi_flags = efx_ef10_filter_mcdi_flags_from_spec(spec);
-	unsigned int match_pri;
-
-	for (match_pri = 0;
-	     match_pri < table->rx_match_count;
-	     match_pri++)
-		if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags)
-			return match_pri;
-
-	return -EPROTONOSUPPORT;
-}
-
-static s32 efx_ef10_filter_insert_locked(struct efx_nic *efx,
-					 struct efx_filter_spec *spec,
-					 bool replace_equal)
-{
-	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	struct efx_ef10_filter_table *table;
-	struct efx_filter_spec *saved_spec;
-	struct efx_rss_context *ctx = NULL;
-	unsigned int match_pri, hash;
-	unsigned int priv_flags;
-	bool rss_locked = false;
-	bool replacing = false;
-	unsigned int depth, i;
-	int ins_index = -1;
-	DEFINE_WAIT(wait);
-	bool is_mc_recip;
-	s32 rc;
-
-	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
-	table = efx->filter_state;
-	down_write(&table->lock);
-
-	/* For now, only support RX filters */
-	if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
-	    EFX_FILTER_FLAG_RX) {
-		rc = -EINVAL;
-		goto out_unlock;
-	}
-
-	rc = efx_ef10_filter_pri(table, spec);
-	if (rc < 0)
-		goto out_unlock;
-	match_pri = rc;
-
-	hash = efx_filter_spec_hash(spec);
-	is_mc_recip = efx_filter_is_mc_recipient(spec);
-	if (is_mc_recip)
-		bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
-
-	if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
-		mutex_lock(&efx->rss_lock);
-		rss_locked = true;
-		if (spec->rss_context)
-			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
-		else
-			ctx = &efx->rss_context;
-		if (!ctx) {
-			rc = -ENOENT;
-			goto out_unlock;
-		}
-		if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
-			rc = -EOPNOTSUPP;
-			goto out_unlock;
-		}
-	}
-
-	/* Find any existing filters with the same match tuple or
-	 * else a free slot to insert at.
-	 */
-	for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
-		i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-		saved_spec = efx_ef10_filter_entry_spec(table, i);
-
-		if (!saved_spec) {
-			if (ins_index < 0)
-				ins_index = i;
-		} else if (efx_filter_spec_equal(spec, saved_spec)) {
-			if (spec->priority < saved_spec->priority &&
-			    spec->priority != EFX_FILTER_PRI_AUTO) {
-				rc = -EPERM;
-				goto out_unlock;
-			}
-			if (!is_mc_recip) {
-				/* This is the only one */
-				if (spec->priority ==
-				    saved_spec->priority &&
-				    !replace_equal) {
-					rc = -EEXIST;
-					goto out_unlock;
-				}
-				ins_index = i;
-				break;
-			} else if (spec->priority >
-				   saved_spec->priority ||
-				   (spec->priority ==
-				    saved_spec->priority &&
-				    replace_equal)) {
-				if (ins_index < 0)
-					ins_index = i;
-				else
-					__set_bit(depth, mc_rem_map);
-			}
-		}
-	}
-
-	/* Once we reach the maximum search depth, use the first suitable
-	 * slot, or return -EBUSY if there was none
-	 */
-	if (ins_index < 0) {
-		rc = -EBUSY;
-		goto out_unlock;
-	}
-
-	/* Create a software table entry if necessary. */
-	saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
-	if (saved_spec) {
-		if (spec->priority == EFX_FILTER_PRI_AUTO &&
-		    saved_spec->priority >= EFX_FILTER_PRI_AUTO) {
-			/* Just make sure it won't be removed */
-			if (saved_spec->priority > EFX_FILTER_PRI_AUTO)
-				saved_spec->flags |= EFX_FILTER_FLAG_RX_OVER_AUTO;
-			table->entry[ins_index].spec &=
-				~EFX_EF10_FILTER_FLAG_AUTO_OLD;
-			rc = ins_index;
-			goto out_unlock;
-		}
-		replacing = true;
-		priv_flags = efx_ef10_filter_entry_flags(table, ins_index);
-	} else {
-		saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
-		if (!saved_spec) {
-			rc = -ENOMEM;
-			goto out_unlock;
-		}
-		*saved_spec = *spec;
-		priv_flags = 0;
-	}
-	efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
-
-	/* Actually insert the filter on the HW */
-	rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
-				  ctx, replacing);
-
-	if (rc == -EINVAL && nic_data->must_realloc_vis)
-		/* The MC rebooted under us, causing it to reject our filter
-		 * insertion as pointing to an invalid VI (spec->dmaq_id).
-		 */
-		rc = -EAGAIN;
-
-	/* Finalise the software table entry */
-	if (rc == 0) {
-		if (replacing) {
-			/* Update the fields that may differ */
-			if (saved_spec->priority == EFX_FILTER_PRI_AUTO)
-				saved_spec->flags |=
-					EFX_FILTER_FLAG_RX_OVER_AUTO;
-			saved_spec->priority = spec->priority;
-			saved_spec->flags &= EFX_FILTER_FLAG_RX_OVER_AUTO;
-			saved_spec->flags |= spec->flags;
-			saved_spec->rss_context = spec->rss_context;
-			saved_spec->dmaq_id = spec->dmaq_id;
-		}
-	} else if (!replacing) {
-		kfree(saved_spec);
-		saved_spec = NULL;
-	} else {
-		/* We failed to replace, so the old filter is still present.
-		 * Roll back the software table to reflect this.  In fact the
-		 * efx_ef10_filter_set_entry() call below will do the right
-		 * thing, so nothing extra is needed here.
-		 */
-	}
-	efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
-
-	/* Remove and finalise entries for lower-priority multicast
-	 * recipients
-	 */
-	if (is_mc_recip) {
-		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
-		unsigned int depth, i;
-
-		memset(inbuf, 0, sizeof(inbuf));
-
-		for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
-			if (!test_bit(depth, mc_rem_map))
-				continue;
-
-			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-			saved_spec = efx_ef10_filter_entry_spec(table, i);
-			priv_flags = efx_ef10_filter_entry_flags(table, i);
-
-			if (rc == 0) {
-				MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
-					       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
-				MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
-					       table->entry[i].handle);
-				rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
-						  inbuf, sizeof(inbuf),
-						  NULL, 0, NULL);
-			}
-
-			if (rc == 0) {
-				kfree(saved_spec);
-				saved_spec = NULL;
-				priv_flags = 0;
-			}
-			efx_ef10_filter_set_entry(table, i, saved_spec,
-						  priv_flags);
-		}
-	}
-
-	/* If successful, return the inserted filter ID */
-	if (rc == 0)
-		rc = efx_ef10_make_filter_id(match_pri, ins_index);
-
-out_unlock:
-	if (rss_locked)
-		mutex_unlock(&efx->rss_lock);
-	up_write(&table->lock);
-	return rc;
-}
-
-static s32 efx_ef10_filter_insert(struct efx_nic *efx,
-				  struct efx_filter_spec *spec,
-				  bool replace_equal)
-{
-	s32 ret;
-
-	down_read(&efx->filter_sem);
-	ret = efx_ef10_filter_insert_locked(efx, spec, replace_equal);
-	up_read(&efx->filter_sem);
-
-	return ret;
-}
-
-static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx)
-{
-	/* no need to do anything here on EF10 */
-}
-
-/* Remove a filter.
- * If !by_index, remove by ID
- * If by_index, remove by index
- * Filter ID may come from userland and must be range-checked.
- * Caller must hold efx->filter_sem for read, and efx->filter_state->lock
- * for write.
- */
-static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
-					   unsigned int priority_mask,
-					   u32 filter_id, bool by_index)
-{
-	unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id);
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf,
-			 MC_CMD_FILTER_OP_IN_HANDLE_OFST +
-			 MC_CMD_FILTER_OP_IN_HANDLE_LEN);
-	struct efx_filter_spec *spec;
-	DEFINE_WAIT(wait);
-	int rc;
-
-	spec = efx_ef10_filter_entry_spec(table, filter_idx);
-	if (!spec ||
-	    (!by_index &&
-	     efx_ef10_filter_pri(table, spec) !=
-	     efx_ef10_filter_get_unsafe_pri(filter_id)))
-		return -ENOENT;
-
-	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO &&
-	    priority_mask == (1U << EFX_FILTER_PRI_AUTO)) {
-		/* Just remove flags */
-		spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO;
-		table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
-		return 0;
-	}
-
-	if (!(priority_mask & (1U << spec->priority)))
-		return -ENOENT;
-
-	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
-		/* Reset to an automatic filter */
-
-		struct efx_filter_spec new_spec = *spec;
-
-		new_spec.priority = EFX_FILTER_PRI_AUTO;
-		new_spec.flags = (EFX_FILTER_FLAG_RX |
-				  (efx_rss_active(&efx->rss_context) ?
-				   EFX_FILTER_FLAG_RX_RSS : 0));
-		new_spec.dmaq_id = 0;
-		new_spec.rss_context = 0;
-		rc = efx_ef10_filter_push(efx, &new_spec,
-					  &table->entry[filter_idx].handle,
-					  &efx->rss_context,
-					  true);
-
-		if (rc == 0)
-			*spec = new_spec;
-	} else {
-		/* Really remove the filter */
-
-		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
-			       efx_ef10_filter_is_exclusive(spec) ?
-			       MC_CMD_FILTER_OP_IN_OP_REMOVE :
-			       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
-		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
-			       table->entry[filter_idx].handle);
-		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP,
-					inbuf, sizeof(inbuf), NULL, 0, NULL);
-
-		if ((rc == 0) || (rc == -ENOENT)) {
-			/* Filter removed OK or didn't actually exist */
-			kfree(spec);
-			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
-		} else {
-			efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
-					       MC_CMD_FILTER_OP_EXT_IN_LEN,
-					       NULL, 0, rc);
-		}
-	}
-
-	return rc;
-}
-
-static int efx_ef10_filter_remove_safe(struct efx_nic *efx,
-				       enum efx_filter_priority priority,
-				       u32 filter_id)
-{
-	struct efx_ef10_filter_table *table;
-	int rc;
-
-	down_read(&efx->filter_sem);
-	table = efx->filter_state;
-	down_write(&table->lock);
-	rc = efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
-					     false);
-	up_write(&table->lock);
-	up_read(&efx->filter_sem);
-	return rc;
-}
-
-/* Caller must hold efx->filter_sem for read */
-static void efx_ef10_filter_remove_unsafe(struct efx_nic *efx,
-					  enum efx_filter_priority priority,
-					  u32 filter_id)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-
-	if (filter_id == EFX_EF10_FILTER_ID_INVALID)
-		return;
-
-	down_write(&table->lock);
-	efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
-					true);
-	up_write(&table->lock);
-}
-
-static int efx_ef10_filter_get_safe(struct efx_nic *efx,
-				    enum efx_filter_priority priority,
-				    u32 filter_id, struct efx_filter_spec *spec)
-{
-	unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id);
-	const struct efx_filter_spec *saved_spec;
-	struct efx_ef10_filter_table *table;
-	int rc;
-
-	down_read(&efx->filter_sem);
-	table = efx->filter_state;
-	down_read(&table->lock);
-	saved_spec = efx_ef10_filter_entry_spec(table, filter_idx);
-	if (saved_spec && saved_spec->priority == priority &&
-	    efx_ef10_filter_pri(table, saved_spec) ==
-	    efx_ef10_filter_get_unsafe_pri(filter_id)) {
-		*spec = *saved_spec;
-		rc = 0;
-	} else {
-		rc = -ENOENT;
-	}
-	up_read(&table->lock);
-	up_read(&efx->filter_sem);
-	return rc;
-}
-
-static int efx_ef10_filter_clear_rx(struct efx_nic *efx,
-				    enum efx_filter_priority priority)
-{
-	struct efx_ef10_filter_table *table;
-	unsigned int priority_mask;
-	unsigned int i;
-	int rc;
-
-	priority_mask = (((1U << (priority + 1)) - 1) &
-			 ~(1U << EFX_FILTER_PRI_AUTO));
-
-	down_read(&efx->filter_sem);
-	table = efx->filter_state;
-	down_write(&table->lock);
-	for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
-		rc = efx_ef10_filter_remove_internal(efx, priority_mask,
-						     i, true);
-		if (rc && rc != -ENOENT)
-			break;
-		rc = 0;
-	}
-
-	up_write(&table->lock);
-	up_read(&efx->filter_sem);
-	return rc;
-}
-
-static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx,
-					 enum efx_filter_priority priority)
-{
-	struct efx_ef10_filter_table *table;
-	unsigned int filter_idx;
-	s32 count = 0;
-
-	down_read(&efx->filter_sem);
-	table = efx->filter_state;
-	down_read(&table->lock);
-	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
-		if (table->entry[filter_idx].spec &&
-		    efx_ef10_filter_entry_spec(table, filter_idx)->priority ==
-		    priority)
-			++count;
-	}
-	up_read(&table->lock);
-	up_read(&efx->filter_sem);
-	return count;
-}
-
-static u32 efx_ef10_filter_get_rx_id_limit(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-
-	return table->rx_match_count * HUNT_FILTER_TBL_ROWS * 2;
-}
-
-static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
-				      enum efx_filter_priority priority,
-				      u32 *buf, u32 size)
-{
-	struct efx_ef10_filter_table *table;
-	struct efx_filter_spec *spec;
-	unsigned int filter_idx;
-	s32 count = 0;
-
-	down_read(&efx->filter_sem);
-	table = efx->filter_state;
-	down_read(&table->lock);
-
-	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
-		spec = efx_ef10_filter_entry_spec(table, filter_idx);
-		if (spec && spec->priority == priority) {
-			if (count == size) {
-				count = -EMSGSIZE;
-				break;
-			}
-			buf[count++] =
-				efx_ef10_make_filter_id(
-					efx_ef10_filter_pri(table, spec),
-					filter_idx);
-		}
-	}
-	up_read(&table->lock);
-	up_read(&efx->filter_sem);
-	return count;
-}
-
-#ifdef CONFIG_RFS_ACCEL
-
-static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
-					   unsigned int filter_idx)
-{
-	struct efx_filter_spec *spec, saved_spec;
-	struct efx_ef10_filter_table *table;
-	struct efx_arfs_rule *rule = NULL;
-	bool ret = true, force = false;
-	u16 arfs_id;
-
-	down_read(&efx->filter_sem);
-	table = efx->filter_state;
-	down_write(&table->lock);
-	spec = efx_ef10_filter_entry_spec(table, filter_idx);
-
-	if (!spec || spec->priority != EFX_FILTER_PRI_HINT)
-		goto out_unlock;
-
-	spin_lock_bh(&efx->rps_hash_lock);
-	if (!efx->rps_hash_table) {
-		/* In the absence of the table, we always return 0 to ARFS. */
-		arfs_id = 0;
-	} else {
-		rule = efx_rps_hash_find(efx, spec);
-		if (!rule)
-			/* ARFS table doesn't know of this filter, so remove it */
-			goto expire;
-		arfs_id = rule->arfs_id;
-		ret = efx_rps_check_rule(rule, filter_idx, &force);
-		if (force)
-			goto expire;
-		if (!ret) {
-			spin_unlock_bh(&efx->rps_hash_lock);
-			goto out_unlock;
-		}
-	}
-	if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id))
-		ret = false;
-	else if (rule)
-		rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
-expire:
-	saved_spec = *spec; /* remove operation will kfree spec */
-	spin_unlock_bh(&efx->rps_hash_lock);
-	/* At this point (since we dropped the lock), another thread might queue
-	 * up a fresh insertion request (but the actual insertion will be held
-	 * up by our possession of the filter table lock).  In that case, it
-	 * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that
-	 * the rule is not removed by efx_rps_hash_del() below.
-	 */
-	if (ret)
-		ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
-						      filter_idx, true) == 0;
-	/* While we can't safely dereference rule (we dropped the lock), we can
-	 * still test it for NULL.
-	 */
-	if (ret && rule) {
-		/* Expiring, so remove entry from ARFS table */
-		spin_lock_bh(&efx->rps_hash_lock);
-		efx_rps_hash_del(efx, &saved_spec);
-		spin_unlock_bh(&efx->rps_hash_lock);
-	}
-out_unlock:
-	up_write(&table->lock);
-	up_read(&efx->filter_sem);
-	return ret;
-}
-
-#endif /* CONFIG_RFS_ACCEL */
-
-static int efx_ef10_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags)
-{
-	int match_flags = 0;
-
-#define MAP_FLAG(gen_flag, mcdi_field) do {				\
-		u32 old_mcdi_flags = mcdi_flags;			\
-		mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##	\
-				     mcdi_field ## _LBN);		\
-		if (mcdi_flags != old_mcdi_flags)			\
-			match_flags |= EFX_FILTER_MATCH_ ## gen_flag;	\
-	} while (0)
-
-	if (encap) {
-		/* encap filters must specify encap type */
-		match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE;
-		/* and imply ethertype and ip proto */
-		mcdi_flags &=
-			~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
-		mcdi_flags &=
-			~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
-		/* VLAN tags refer to the outer packet */
-		MAP_FLAG(INNER_VID, INNER_VLAN);
-		MAP_FLAG(OUTER_VID, OUTER_VLAN);
-		/* everything else refers to the inner packet */
-		MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_UCAST_DST);
-		MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_MCAST_DST);
-		MAP_FLAG(REM_HOST, IFRM_SRC_IP);
-		MAP_FLAG(LOC_HOST, IFRM_DST_IP);
-		MAP_FLAG(REM_MAC, IFRM_SRC_MAC);
-		MAP_FLAG(REM_PORT, IFRM_SRC_PORT);
-		MAP_FLAG(LOC_MAC, IFRM_DST_MAC);
-		MAP_FLAG(LOC_PORT, IFRM_DST_PORT);
-		MAP_FLAG(ETHER_TYPE, IFRM_ETHER_TYPE);
-		MAP_FLAG(IP_PROTO, IFRM_IP_PROTO);
-	} else {
-		MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST);
-		MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST);
-		MAP_FLAG(REM_HOST, SRC_IP);
-		MAP_FLAG(LOC_HOST, DST_IP);
-		MAP_FLAG(REM_MAC, SRC_MAC);
-		MAP_FLAG(REM_PORT, SRC_PORT);
-		MAP_FLAG(LOC_MAC, DST_MAC);
-		MAP_FLAG(LOC_PORT, DST_PORT);
-		MAP_FLAG(ETHER_TYPE, ETHER_TYPE);
-		MAP_FLAG(INNER_VID, INNER_VLAN);
-		MAP_FLAG(OUTER_VID, OUTER_VLAN);
-		MAP_FLAG(IP_PROTO, IP_PROTO);
-	}
-#undef MAP_FLAG
-
-	/* Did we map them all? */
-	if (mcdi_flags)
-		return -EINVAL;
-
-	return match_flags;
-}
-
-static void efx_ef10_filter_cleanup_vlans(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_ef10_filter_vlan *vlan, *next_vlan;
-
-	/* See comment in efx_ef10_filter_table_remove() */
-	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
-		return;
-
-	if (!table)
-		return;
-
-	list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list)
-		efx_ef10_filter_del_vlan_internal(efx, vlan);
-}
-
-static bool efx_ef10_filter_match_supported(struct efx_ef10_filter_table *table,
-					    bool encap,
-					    enum efx_filter_match_flags match_flags)
-{
-	unsigned int match_pri;
-	int mf;
-
-	for (match_pri = 0;
-	     match_pri < table->rx_match_count;
-	     match_pri++) {
-		mf = efx_ef10_filter_match_flags_from_mcdi(encap,
-				table->rx_match_mcdi_flags[match_pri]);
-		if (mf == match_flags)
-			return true;
-	}
-
-	return false;
-}
-
-static int
-efx_ef10_filter_table_probe_matches(struct efx_nic *efx,
-				    struct efx_ef10_filter_table *table,
-				    bool encap)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN);
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
-	unsigned int pd_match_pri, pd_match_count;
-	size_t outlen;
-	int rc;
-
-	/* Find out which RX filter types are supported, and their priorities */
-	MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP,
-		       encap ?
-		       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES :
-		       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES);
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO,
-			  inbuf, sizeof(inbuf), outbuf, sizeof(outbuf),
-			  &outlen);
-	if (rc)
-		return rc;
-
-	pd_match_count = MCDI_VAR_ARRAY_LEN(
-		outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES);
-
-	for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) {
-		u32 mcdi_flags =
-			MCDI_ARRAY_DWORD(
-				outbuf,
-				GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES,
-				pd_match_pri);
-		rc = efx_ef10_filter_match_flags_from_mcdi(encap, mcdi_flags);
-		if (rc < 0) {
-			netif_dbg(efx, probe, efx->net_dev,
-				  "%s: fw flags %#x pri %u not supported in driver\n",
-				  __func__, mcdi_flags, pd_match_pri);
-		} else {
-			netif_dbg(efx, probe, efx->net_dev,
-				  "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n",
-				  __func__, mcdi_flags, pd_match_pri,
-				  rc, table->rx_match_count);
-			table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags;
-			table->rx_match_count++;
-		}
-	}
-
-	return 0;
-}
-
-static int efx_ef10_filter_table_probe(struct efx_nic *efx)
-{
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	struct net_device *net_dev = efx->net_dev;
-	struct efx_ef10_filter_table *table;
-	struct efx_ef10_vlan *vlan;
-	int rc;
-
-	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
-		return -EINVAL;
-
-	if (efx->filter_state) /* already probed */
-		return 0;
-
-	table = kzalloc(sizeof(*table), GFP_KERNEL);
-	if (!table)
-		return -ENOMEM;
-
-	table->rx_match_count = 0;
-	rc = efx_ef10_filter_table_probe_matches(efx, table, false);
-	if (rc)
-		goto fail;
-	if (nic_data->datapath_caps &
-		   (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
-		rc = efx_ef10_filter_table_probe_matches(efx, table, true);
-	if (rc)
-		goto fail;
-	if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) &&
-	    !(efx_ef10_filter_match_supported(table, false,
-		(EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) &&
-	      efx_ef10_filter_match_supported(table, false,
-		(EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) {
-		netif_info(efx, probe, net_dev,
-			   "VLAN filters are not supported in this firmware variant\n");
-		net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
-		efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
-		net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
-	}
-
-	table->entry = vzalloc(array_size(HUNT_FILTER_TBL_ROWS,
-					  sizeof(*table->entry)));
-	if (!table->entry) {
-		rc = -ENOMEM;
-		goto fail;
-	}
-
-	table->mc_promisc_last = false;
-	table->vlan_filter =
-		!!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
-	INIT_LIST_HEAD(&table->vlan_list);
-	init_rwsem(&table->lock);
-
-	efx->filter_state = table;
-
-	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
-		rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
-		if (rc)
-			goto fail_add_vlan;
-	}
-
-	return 0;
-
-fail_add_vlan:
-	efx_ef10_filter_cleanup_vlans(efx);
-	efx->filter_state = NULL;
-fail:
-	kfree(table);
-	return rc;
-}
-
-/* Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_table_restore(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	unsigned int invalid_filters = 0, failed = 0;
-	struct efx_ef10_filter_vlan *vlan;
-	struct efx_filter_spec *spec;
-	struct efx_rss_context *ctx;
-	unsigned int filter_idx;
-	u32 mcdi_flags;
-	int match_pri;
-	int rc, i;
-
-	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
-
-	if (!nic_data->must_restore_filters)
-		return;
-
-	if (!table)
-		return;
-
-	down_write(&table->lock);
-	mutex_lock(&efx->rss_lock);
-
-	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
-		spec = efx_ef10_filter_entry_spec(table, filter_idx);
-		if (!spec)
-			continue;
-
-		mcdi_flags = efx_ef10_filter_mcdi_flags_from_spec(spec);
-		match_pri = 0;
-		while (match_pri < table->rx_match_count &&
-		       table->rx_match_mcdi_flags[match_pri] != mcdi_flags)
-			++match_pri;
-		if (match_pri >= table->rx_match_count) {
-			invalid_filters++;
-			goto not_restored;
-		}
-		if (spec->rss_context)
-			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
-		else
-			ctx = &efx->rss_context;
-		if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
-			if (!ctx) {
-				netif_warn(efx, drv, efx->net_dev,
-					   "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
-					   spec->rss_context);
-				invalid_filters++;
-				goto not_restored;
-			}
-			if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
-				netif_warn(efx, drv, efx->net_dev,
-					   "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
-					   spec->rss_context);
-				invalid_filters++;
-				goto not_restored;
-			}
-		}
-
-		rc = efx_ef10_filter_push(efx, spec,
-					  &table->entry[filter_idx].handle,
-					  ctx, false);
-		if (rc)
-			failed++;
-
-		if (rc) {
-not_restored:
-			list_for_each_entry(vlan, &table->vlan_list, list)
-				for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; ++i)
-					if (vlan->default_filters[i] == filter_idx)
-						vlan->default_filters[i] =
-							EFX_EF10_FILTER_ID_INVALID;
-
-			kfree(spec);
-			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
-		}
-	}
-
-	mutex_unlock(&efx->rss_lock);
-	up_write(&table->lock);
-
-	/* This can happen validly if the MC's capabilities have changed, so
-	 * is not an error.
-	 */
-	if (invalid_filters)
-		netif_dbg(efx, drv, efx->net_dev,
-			  "Did not restore %u filters that are now unsupported.\n",
-			  invalid_filters);
-
-	if (failed)
-		netif_err(efx, hw, efx->net_dev,
-			  "unable to restore %u filters\n", failed);
-	else
-		nic_data->must_restore_filters = false;
-}
-
-static void efx_ef10_filter_table_remove(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
-	struct efx_filter_spec *spec;
-	unsigned int filter_idx;
-	int rc;
-
-	efx_ef10_filter_cleanup_vlans(efx);
-	efx->filter_state = NULL;
-	/* If we were called without locking, then it's not safe to free
-	 * the table as others might be using it.  So we just WARN, leak
-	 * the memory, and potentially get an inconsistent filter table
-	 * state.
-	 * This should never actually happen.
-	 */
-	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
-		return;
-
-	if (!table)
-		return;
-
-	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
-		spec = efx_ef10_filter_entry_spec(table, filter_idx);
-		if (!spec)
-			continue;
-
-		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
-			       efx_ef10_filter_is_exclusive(spec) ?
-			       MC_CMD_FILTER_OP_IN_OP_REMOVE :
-			       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
-		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
-			       table->entry[filter_idx].handle);
-		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf,
-					sizeof(inbuf), NULL, 0, NULL);
-		if (rc)
-			netif_info(efx, drv, efx->net_dev,
-				   "%s: filter %04x remove failed\n",
-				   __func__, filter_idx);
-		kfree(spec);
-	}
-
-	vfree(table->entry);
-	kfree(table);
-}
-
-static void efx_ef10_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	unsigned int filter_idx;
-
-	efx_rwsem_assert_write_locked(&table->lock);
-
-	if (*id != EFX_EF10_FILTER_ID_INVALID) {
-		filter_idx = efx_ef10_filter_get_unsafe_id(*id);
-		if (!table->entry[filter_idx].spec)
-			netif_dbg(efx, drv, efx->net_dev,
-				  "marked null spec old %04x:%04x\n", *id,
-				  filter_idx);
-		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD;
-		*id = EFX_EF10_FILTER_ID_INVALID;
-	}
-}
-
-/* Mark old per-VLAN filters that may need to be removed */
-static void _efx_ef10_filter_vlan_mark_old(struct efx_nic *efx,
-					   struct efx_ef10_filter_vlan *vlan)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	unsigned int i;
-
-	for (i = 0; i < table->dev_uc_count; i++)
-		efx_ef10_filter_mark_one_old(efx, &vlan->uc[i]);
-	for (i = 0; i < table->dev_mc_count; i++)
-		efx_ef10_filter_mark_one_old(efx, &vlan->mc[i]);
-	for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
-		efx_ef10_filter_mark_one_old(efx, &vlan->default_filters[i]);
-}
-
-/* Mark old filters that may need to be removed.
- * Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_mark_old(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_ef10_filter_vlan *vlan;
-
-	down_write(&table->lock);
-	list_for_each_entry(vlan, &table->vlan_list, list)
-		_efx_ef10_filter_vlan_mark_old(efx, vlan);
-	up_write(&table->lock);
-}
-
-static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct net_device *net_dev = efx->net_dev;
-	struct netdev_hw_addr *uc;
-	unsigned int i;
-
-	table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
-	ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
-	i = 1;
-	netdev_for_each_uc_addr(uc, net_dev) {
-		if (i >= EFX_EF10_FILTER_DEV_UC_MAX) {
-			table->uc_promisc = true;
-			break;
-		}
-		ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
-		i++;
-	}
-
-	table->dev_uc_count = i;
-}
-
-static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct net_device *net_dev = efx->net_dev;
-	struct netdev_hw_addr *mc;
-	unsigned int i;
-
-	table->mc_overflow = false;
-	table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));
-
-	i = 0;
-	netdev_for_each_mc_addr(mc, net_dev) {
-		if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {
-			table->mc_promisc = true;
-			table->mc_overflow = true;
-			break;
-		}
-		ether_addr_copy(table->dev_mc_list[i].addr, mc->addr);
-		i++;
-	}
-
-	table->dev_mc_count = i;
-}
-
-static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
-					    struct efx_ef10_filter_vlan *vlan,
-					    bool multicast, bool rollback)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_ef10_dev_addr *addr_list;
-	enum efx_filter_flags filter_flags;
-	struct efx_filter_spec spec;
-	u8 baddr[ETH_ALEN];
-	unsigned int i, j;
-	int addr_count;
-	u16 *ids;
-	int rc;
-
-	if (multicast) {
-		addr_list = table->dev_mc_list;
-		addr_count = table->dev_mc_count;
-		ids = vlan->mc;
-	} else {
-		addr_list = table->dev_uc_list;
-		addr_count = table->dev_uc_count;
-		ids = vlan->uc;
-	}
-
-	filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
-
-	/* Insert/renew filters */
-	for (i = 0; i < addr_count; i++) {
-		EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
-		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
-		efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
-		rc = efx_ef10_filter_insert_locked(efx, &spec, true);
-		if (rc < 0) {
-			if (rollback) {
-				netif_info(efx, drv, efx->net_dev,
-					   "efx_ef10_filter_insert failed rc=%d\n",
-					   rc);
-				/* Fall back to promiscuous */
-				for (j = 0; j < i; j++) {
-					efx_ef10_filter_remove_unsafe(
-						efx, EFX_FILTER_PRI_AUTO,
-						ids[j]);
-					ids[j] = EFX_EF10_FILTER_ID_INVALID;
-				}
-				return rc;
-			} else {
-				/* keep invalid ID, and carry on */
-			}
-		} else {
-			ids[i] = efx_ef10_filter_get_unsafe_id(rc);
-		}
-	}
-
-	if (multicast && rollback) {
-		/* Also need an Ethernet broadcast filter */
-		EFX_WARN_ON_PARANOID(vlan->default_filters[EFX_EF10_BCAST] !=
-				     EFX_EF10_FILTER_ID_INVALID);
-		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
-		eth_broadcast_addr(baddr);
-		efx_filter_set_eth_local(&spec, vlan->vid, baddr);
-		rc = efx_ef10_filter_insert_locked(efx, &spec, true);
-		if (rc < 0) {
-			netif_warn(efx, drv, efx->net_dev,
-				   "Broadcast filter insert failed rc=%d\n", rc);
-			/* Fall back to promiscuous */
-			for (j = 0; j < i; j++) {
-				efx_ef10_filter_remove_unsafe(
-					efx, EFX_FILTER_PRI_AUTO,
-					ids[j]);
-				ids[j] = EFX_EF10_FILTER_ID_INVALID;
-			}
-			return rc;
-		} else {
-			vlan->default_filters[EFX_EF10_BCAST] =
-				efx_ef10_filter_get_unsafe_id(rc);
-		}
-	}
-
-	return 0;
-}
-
-static int efx_ef10_filter_insert_def(struct efx_nic *efx,
-				      struct efx_ef10_filter_vlan *vlan,
-				      enum efx_encap_type encap_type,
-				      bool multicast, bool rollback)
-{
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	enum efx_filter_flags filter_flags;
-	struct efx_filter_spec spec;
-	u8 baddr[ETH_ALEN];
-	int rc;
-	u16 *id;
-
-	filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
-
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
-
-	if (multicast)
-		efx_filter_set_mc_def(&spec);
-	else
-		efx_filter_set_uc_def(&spec);
-
-	if (encap_type) {
-		if (nic_data->datapath_caps &
-		    (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
-			efx_filter_set_encap_type(&spec, encap_type);
-		else
-			/* don't insert encap filters on non-supporting
-			 * platforms. ID will be left as INVALID.
-			 */
-			return 0;
-	}
-
-	if (vlan->vid != EFX_FILTER_VID_UNSPEC)
-		efx_filter_set_eth_local(&spec, vlan->vid, NULL);
-
-	rc = efx_ef10_filter_insert_locked(efx, &spec, true);
-	if (rc < 0) {
-		const char *um = multicast ? "Multicast" : "Unicast";
-		const char *encap_name = "";
-		const char *encap_ipv = "";
-
-		if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
-		    EFX_ENCAP_TYPE_VXLAN)
-			encap_name = "VXLAN ";
-		else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
-			 EFX_ENCAP_TYPE_NVGRE)
-			encap_name = "NVGRE ";
-		else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
-			 EFX_ENCAP_TYPE_GENEVE)
-			encap_name = "GENEVE ";
-		if (encap_type & EFX_ENCAP_FLAG_IPV6)
-			encap_ipv = "IPv6 ";
-		else if (encap_type)
-			encap_ipv = "IPv4 ";
-
-		/* unprivileged functions can't insert mismatch filters
-		 * for encapsulated or unicast traffic, so downgrade
-		 * those warnings to debug.
-		 */
-		netif_cond_dbg(efx, drv, efx->net_dev,
-			       rc == -EPERM && (encap_type || !multicast), warn,
-			       "%s%s%s mismatch filter insert failed rc=%d\n",
-			       encap_name, encap_ipv, um, rc);
-	} else if (multicast) {
-		/* mapping from encap types to default filter IDs (multicast) */
-		static enum efx_ef10_default_filters map[] = {
-			[EFX_ENCAP_TYPE_NONE] = EFX_EF10_MCDEF,
-			[EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_MCDEF,
-			[EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_MCDEF,
-			[EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_MCDEF,
-			[EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
-				EFX_EF10_VXLAN6_MCDEF,
-			[EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
-				EFX_EF10_NVGRE6_MCDEF,
-			[EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
-				EFX_EF10_GENEVE6_MCDEF,
-		};
-
-		/* quick bounds check (BCAST result impossible) */
-		BUILD_BUG_ON(EFX_EF10_BCAST != 0);
-		if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
-			WARN_ON(1);
-			return -EINVAL;
-		}
-		/* then follow map */
-		id = &vlan->default_filters[map[encap_type]];
-
-		EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
-		*id = efx_ef10_filter_get_unsafe_id(rc);
-		if (!nic_data->workaround_26807 && !encap_type) {
-			/* Also need an Ethernet broadcast filter */
-			efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
-					   filter_flags, 0);
-			eth_broadcast_addr(baddr);
-			efx_filter_set_eth_local(&spec, vlan->vid, baddr);
-			rc = efx_ef10_filter_insert_locked(efx, &spec, true);
-			if (rc < 0) {
-				netif_warn(efx, drv, efx->net_dev,
-					   "Broadcast filter insert failed rc=%d\n",
-					   rc);
-				if (rollback) {
-					/* Roll back the mc_def filter */
-					efx_ef10_filter_remove_unsafe(
-							efx, EFX_FILTER_PRI_AUTO,
-							*id);
-					*id = EFX_EF10_FILTER_ID_INVALID;
-					return rc;
-				}
-			} else {
-				EFX_WARN_ON_PARANOID(
-					vlan->default_filters[EFX_EF10_BCAST] !=
-					EFX_EF10_FILTER_ID_INVALID);
-				vlan->default_filters[EFX_EF10_BCAST] =
-					efx_ef10_filter_get_unsafe_id(rc);
-			}
-		}
-		rc = 0;
-	} else {
-		/* mapping from encap types to default filter IDs (unicast) */
-		static enum efx_ef10_default_filters map[] = {
-			[EFX_ENCAP_TYPE_NONE] = EFX_EF10_UCDEF,
-			[EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_UCDEF,
-			[EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_UCDEF,
-			[EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_UCDEF,
-			[EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
-				EFX_EF10_VXLAN6_UCDEF,
-			[EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
-				EFX_EF10_NVGRE6_UCDEF,
-			[EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
-				EFX_EF10_GENEVE6_UCDEF,
-		};
-
-		/* quick bounds check (BCAST result impossible) */
-		BUILD_BUG_ON(EFX_EF10_BCAST != 0);
-		if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
-			WARN_ON(1);
-			return -EINVAL;
-		}
-		/* then follow map */
-		id = &vlan->default_filters[map[encap_type]];
-		EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
-		*id = rc;
-		rc = 0;
-	}
-	return rc;
-}
-
-/* Remove filters that weren't renewed. */
-static void efx_ef10_filter_remove_old(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	int remove_failed = 0;
-	int remove_noent = 0;
-	int rc;
-	int i;
-
-	down_write(&table->lock);
-	for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
-		if (READ_ONCE(table->entry[i].spec) &
-		    EFX_EF10_FILTER_FLAG_AUTO_OLD) {
-			rc = efx_ef10_filter_remove_internal(efx,
-					1U << EFX_FILTER_PRI_AUTO, i, true);
-			if (rc == -ENOENT)
-				remove_noent++;
-			else if (rc)
-				remove_failed++;
-		}
-	}
-	up_write(&table->lock);
-
-	if (remove_failed)
-		netif_info(efx, drv, efx->net_dev,
-			   "%s: failed to remove %d filters\n",
-			   __func__, remove_failed);
-	if (remove_noent)
-		netif_info(efx, drv, efx->net_dev,
-			   "%s: failed to remove %d non-existent filters\n",
-			   __func__, remove_noent);
-}
-
 static int efx_ef10_vport_set_mac_address(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
@@ -5545,7 +3153,7 @@ static int efx_ef10_vport_set_mac_address(struct efx_nic *efx)
 	efx_device_detach_sync(efx);
 	efx_net_stop(efx->net_dev);
 	down_write(&efx->filter_sem);
-	efx_ef10_filter_table_remove(efx);
+	efx_mcdi_filter_table_remove(efx);
 	up_write(&efx->filter_sem);
 
 	rc = efx_ef10_vadaptor_free(efx, nic_data->vport_id);
@@ -5577,7 +3185,7 @@ restore_vadaptor:
 		goto reset_nic;
 restore_filters:
 	down_write(&efx->filter_sem);
-	rc2 = efx_ef10_filter_table_probe(efx);
+	rc2 = efx_mcdi_filter_table_probe(efx);
 	up_write(&efx->filter_sem);
 	if (rc2)
 		goto reset_nic;
@@ -5598,256 +3206,6 @@ reset_nic:
 	return rc ? rc : rc2;
 }
 
-/* Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_vlan_sync_rx_mode(struct efx_nic *efx,
-					      struct efx_ef10_filter_vlan *vlan)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
-	/* Do not install unspecified VID if VLAN filtering is enabled.
-	 * Do not install all specified VIDs if VLAN filtering is disabled.
-	 */
-	if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter)
-		return;
-
-	/* Insert/renew unicast filters */
-	if (table->uc_promisc) {
-		efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NONE,
-					   false, false);
-		efx_ef10_filter_insert_addr_list(efx, vlan, false, false);
-	} else {
-		/* If any of the filters failed to insert, fall back to
-		 * promiscuous mode - add in the uc_def filter.  But keep
-		 * our individual unicast filters.
-		 */
-		if (efx_ef10_filter_insert_addr_list(efx, vlan, false, false))
-			efx_ef10_filter_insert_def(efx, vlan,
-						   EFX_ENCAP_TYPE_NONE,
-						   false, false);
-	}
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
-				   false, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
-					      EFX_ENCAP_FLAG_IPV6,
-				   false, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
-				   false, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
-					      EFX_ENCAP_FLAG_IPV6,
-				   false, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
-				   false, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
-					      EFX_ENCAP_FLAG_IPV6,
-				   false, false);
-
-	/* Insert/renew multicast filters */
-	/* If changing promiscuous state with cascaded multicast filters, remove
-	 * old filters first, so that packets are dropped rather than duplicated
-	 */
-	if (nic_data->workaround_26807 &&
-	    table->mc_promisc_last != table->mc_promisc)
-		efx_ef10_filter_remove_old(efx);
-	if (table->mc_promisc) {
-		if (nic_data->workaround_26807) {
-			/* If we failed to insert promiscuous filters, rollback
-			 * and fall back to individual multicast filters
-			 */
-			if (efx_ef10_filter_insert_def(efx, vlan,
-						       EFX_ENCAP_TYPE_NONE,
-						       true, true)) {
-				/* Changing promisc state, so remove old filters */
-				efx_ef10_filter_remove_old(efx);
-				efx_ef10_filter_insert_addr_list(efx, vlan,
-								 true, false);
-			}
-		} else {
-			/* If we failed to insert promiscuous filters, don't
-			 * rollback.  Regardless, also insert the mc_list,
-			 * unless it's incomplete due to overflow
-			 */
-			efx_ef10_filter_insert_def(efx, vlan,
-						   EFX_ENCAP_TYPE_NONE,
-						   true, false);
-			if (!table->mc_overflow)
-				efx_ef10_filter_insert_addr_list(efx, vlan,
-								 true, false);
-		}
-	} else {
-		/* If any filters failed to insert, rollback and fall back to
-		 * promiscuous mode - mc_def filter and maybe broadcast.  If
-		 * that fails, roll back again and insert as many of our
-		 * individual multicast filters as we can.
-		 */
-		if (efx_ef10_filter_insert_addr_list(efx, vlan, true, true)) {
-			/* Changing promisc state, so remove old filters */
-			if (nic_data->workaround_26807)
-				efx_ef10_filter_remove_old(efx);
-			if (efx_ef10_filter_insert_def(efx, vlan,
-						       EFX_ENCAP_TYPE_NONE,
-						       true, true))
-				efx_ef10_filter_insert_addr_list(efx, vlan,
-								 true, false);
-		}
-	}
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
-				   true, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
-					      EFX_ENCAP_FLAG_IPV6,
-				   true, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
-				   true, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
-					      EFX_ENCAP_FLAG_IPV6,
-				   true, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
-				   true, false);
-	efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
-					      EFX_ENCAP_FLAG_IPV6,
-				   true, false);
-}
-
-/* Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct net_device *net_dev = efx->net_dev;
-	struct efx_ef10_filter_vlan *vlan;
-	bool vlan_filter;
-
-	if (!efx_dev_registered(efx))
-		return;
-
-	if (!table)
-		return;
-
-	efx_ef10_filter_mark_old(efx);
-
-	/* Copy/convert the address lists; add the primary station
-	 * address and broadcast address
-	 */
-	netif_addr_lock_bh(net_dev);
-	efx_ef10_filter_uc_addr_list(efx);
-	efx_ef10_filter_mc_addr_list(efx);
-	netif_addr_unlock_bh(net_dev);
-
-	/* If VLAN filtering changes, all old filters are finally removed.
-	 * Do it in advance to avoid conflicts for unicast untagged and
-	 * VLAN 0 tagged filters.
-	 */
-	vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
-	if (table->vlan_filter != vlan_filter) {
-		table->vlan_filter = vlan_filter;
-		efx_ef10_filter_remove_old(efx);
-	}
-
-	list_for_each_entry(vlan, &table->vlan_list, list)
-		efx_ef10_filter_vlan_sync_rx_mode(efx, vlan);
-
-	efx_ef10_filter_remove_old(efx);
-	table->mc_promisc_last = table->mc_promisc;
-}
-
-static struct efx_ef10_filter_vlan *efx_ef10_filter_find_vlan(struct efx_nic *efx, u16 vid)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_ef10_filter_vlan *vlan;
-
-	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
-
-	list_for_each_entry(vlan, &table->vlan_list, list) {
-		if (vlan->vid == vid)
-			return vlan;
-	}
-
-	return NULL;
-}
-
-static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_ef10_filter_vlan *vlan;
-	unsigned int i;
-
-	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
-		return -EINVAL;
-
-	vlan = efx_ef10_filter_find_vlan(efx, vid);
-	if (WARN_ON(vlan)) {
-		netif_err(efx, drv, efx->net_dev,
-			  "VLAN %u already added\n", vid);
-		return -EALREADY;
-	}
-
-	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
-	if (!vlan)
-		return -ENOMEM;
-
-	vlan->vid = vid;
-
-	for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
-		vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID;
-	for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
-		vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID;
-	for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
-		vlan->default_filters[i] = EFX_EF10_FILTER_ID_INVALID;
-
-	list_add_tail(&vlan->list, &table->vlan_list);
-
-	if (efx_dev_registered(efx))
-		efx_ef10_filter_vlan_sync_rx_mode(efx, vlan);
-
-	return 0;
-}
-
-static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx,
-					      struct efx_ef10_filter_vlan *vlan)
-{
-	unsigned int i;
-
-	/* See comment in efx_ef10_filter_table_remove() */
-	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
-		return;
-
-	list_del(&vlan->list);
-
-	for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
-		efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
-					      vlan->uc[i]);
-	for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
-		efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
-					      vlan->mc[i]);
-	for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
-		if (vlan->default_filters[i] != EFX_EF10_FILTER_ID_INVALID)
-			efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
-						      vlan->default_filters[i]);
-
-	kfree(vlan);
-}
-
-static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid)
-{
-	struct efx_ef10_filter_vlan *vlan;
-
-	/* See comment in efx_ef10_filter_table_remove() */
-	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
-		return;
-
-	vlan = efx_ef10_filter_find_vlan(efx, vid);
-	if (!vlan) {
-		netif_err(efx, drv, efx->net_dev,
-			  "VLAN %u not found in filter state\n", vid);
-		return;
-	}
-
-	efx_ef10_filter_del_vlan_internal(efx, vlan);
-}
-
 static int efx_ef10_set_mac_address(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN);
@@ -5860,7 +3218,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 
 	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
-	efx_ef10_filter_table_remove(efx);
+	efx_mcdi_filter_table_remove(efx);
 
 	ether_addr_copy(MCDI_PTR(inbuf, VADAPTOR_SET_MAC_IN_MACADDR),
 			efx->net_dev->dev_addr);
@@ -5869,7 +3227,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf,
 				sizeof(inbuf), NULL, 0, NULL);
 
-	efx_ef10_filter_table_probe(efx);
+	efx_mcdi_filter_table_probe(efx);
 	up_write(&efx->filter_sem);
 	mutex_unlock(&efx->mac_lock);
 
@@ -5931,14 +3289,14 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 
 static int efx_ef10_mac_reconfigure(struct efx_nic *efx)
 {
-	efx_ef10_filter_sync_rx_mode(efx);
+	efx_mcdi_filter_sync_rx_mode(efx);
 
 	return efx_mcdi_set_mac(efx);
 }
 
 static int efx_ef10_mac_reconfigure_vf(struct efx_nic *efx)
 {
-	efx_ef10_filter_sync_rx_mode(efx);
+	efx_mcdi_filter_sync_rx_mode(efx);
 
 	return 0;
 }
@@ -6650,36 +4008,36 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.irq_handle_legacy = efx_ef10_legacy_interrupt,
 	.tx_probe = efx_ef10_tx_probe,
 	.tx_init = efx_ef10_tx_init,
-	.tx_remove = efx_ef10_tx_remove,
+	.tx_remove = efx_mcdi_tx_remove,
 	.tx_write = efx_ef10_tx_write,
 	.tx_limit_len = efx_ef10_tx_limit_len,
-	.rx_push_rss_config = efx_ef10_vf_rx_push_rss_config,
-	.rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
-	.rx_probe = efx_ef10_rx_probe,
-	.rx_init = efx_ef10_rx_init,
-	.rx_remove = efx_ef10_rx_remove,
+	.rx_push_rss_config = efx_mcdi_vf_rx_push_rss_config,
+	.rx_pull_rss_config = efx_mcdi_rx_pull_rss_config,
+	.rx_probe = efx_mcdi_rx_probe,
+	.rx_init = efx_mcdi_rx_init,
+	.rx_remove = efx_mcdi_rx_remove,
 	.rx_write = efx_ef10_rx_write,
 	.rx_defer_refill = efx_ef10_rx_defer_refill,
-	.ev_probe = efx_ef10_ev_probe,
+	.ev_probe = efx_mcdi_ev_probe,
 	.ev_init = efx_ef10_ev_init,
-	.ev_fini = efx_ef10_ev_fini,
-	.ev_remove = efx_ef10_ev_remove,
+	.ev_fini = efx_mcdi_ev_fini,
+	.ev_remove = efx_mcdi_ev_remove,
 	.ev_process = efx_ef10_ev_process,
 	.ev_read_ack = efx_ef10_ev_read_ack,
 	.ev_test_generate = efx_ef10_ev_test_generate,
-	.filter_table_probe = efx_ef10_filter_table_probe,
-	.filter_table_restore = efx_ef10_filter_table_restore,
-	.filter_table_remove = efx_ef10_filter_table_remove,
-	.filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter,
-	.filter_insert = efx_ef10_filter_insert,
-	.filter_remove_safe = efx_ef10_filter_remove_safe,
-	.filter_get_safe = efx_ef10_filter_get_safe,
-	.filter_clear_rx = efx_ef10_filter_clear_rx,
-	.filter_count_rx_used = efx_ef10_filter_count_rx_used,
-	.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
-	.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
+	.filter_table_probe = efx_mcdi_filter_table_probe,
+	.filter_table_restore = efx_mcdi_filter_table_restore,
+	.filter_table_remove = efx_mcdi_filter_table_remove,
+	.filter_update_rx_scatter = efx_mcdi_update_rx_scatter,
+	.filter_insert = efx_mcdi_filter_insert,
+	.filter_remove_safe = efx_mcdi_filter_remove_safe,
+	.filter_get_safe = efx_mcdi_filter_get_safe,
+	.filter_clear_rx = efx_mcdi_filter_clear_rx,
+	.filter_count_rx_used = efx_mcdi_filter_count_rx_used,
+	.filter_get_rx_id_limit = efx_mcdi_filter_get_rx_id_limit,
+	.filter_get_rx_ids = efx_mcdi_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
+	.filter_rfs_expire_one = efx_mcdi_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD
 	.mtd_probe = efx_port_dummy_op_int,
@@ -6709,7 +4067,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
 	.offload_features = EF10_OFFLOAD_FEATURES,
 	.mcdi_max_ver = 2,
-	.max_rx_ip_filters = HUNT_FILTER_TBL_ROWS,
+	.max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS,
 	.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
 			    1 << HWTSTAMP_FILTER_ALL,
 	.rx_hash_key_size = 40,
@@ -6759,39 +4117,39 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.irq_handle_legacy = efx_ef10_legacy_interrupt,
 	.tx_probe = efx_ef10_tx_probe,
 	.tx_init = efx_ef10_tx_init,
-	.tx_remove = efx_ef10_tx_remove,
+	.tx_remove = efx_mcdi_tx_remove,
 	.tx_write = efx_ef10_tx_write,
 	.tx_limit_len = efx_ef10_tx_limit_len,
-	.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
-	.rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
-	.rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config,
-	.rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config,
-	.rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts,
-	.rx_probe = efx_ef10_rx_probe,
-	.rx_init = efx_ef10_rx_init,
-	.rx_remove = efx_ef10_rx_remove,
+	.rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config,
+	.rx_pull_rss_config = efx_mcdi_rx_pull_rss_config,
+	.rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config,
+	.rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config,
+	.rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
+	.rx_probe = efx_mcdi_rx_probe,
+	.rx_init = efx_mcdi_rx_init,
+	.rx_remove = efx_mcdi_rx_remove,
 	.rx_write = efx_ef10_rx_write,
 	.rx_defer_refill = efx_ef10_rx_defer_refill,
-	.ev_probe = efx_ef10_ev_probe,
+	.ev_probe = efx_mcdi_ev_probe,
 	.ev_init = efx_ef10_ev_init,
-	.ev_fini = efx_ef10_ev_fini,
-	.ev_remove = efx_ef10_ev_remove,
+	.ev_fini = efx_mcdi_ev_fini,
+	.ev_remove = efx_mcdi_ev_remove,
 	.ev_process = efx_ef10_ev_process,
 	.ev_read_ack = efx_ef10_ev_read_ack,
 	.ev_test_generate = efx_ef10_ev_test_generate,
-	.filter_table_probe = efx_ef10_filter_table_probe,
-	.filter_table_restore = efx_ef10_filter_table_restore,
-	.filter_table_remove = efx_ef10_filter_table_remove,
-	.filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter,
-	.filter_insert = efx_ef10_filter_insert,
-	.filter_remove_safe = efx_ef10_filter_remove_safe,
-	.filter_get_safe = efx_ef10_filter_get_safe,
-	.filter_clear_rx = efx_ef10_filter_clear_rx,
-	.filter_count_rx_used = efx_ef10_filter_count_rx_used,
-	.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
-	.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
+	.filter_table_probe = efx_mcdi_filter_table_probe,
+	.filter_table_restore = efx_mcdi_filter_table_restore,
+	.filter_table_remove = efx_mcdi_filter_table_remove,
+	.filter_update_rx_scatter = efx_mcdi_update_rx_scatter,
+	.filter_insert = efx_mcdi_filter_insert,
+	.filter_remove_safe = efx_mcdi_filter_remove_safe,
+	.filter_get_safe = efx_mcdi_filter_get_safe,
+	.filter_clear_rx = efx_mcdi_filter_clear_rx,
+	.filter_count_rx_used = efx_mcdi_filter_count_rx_used,
+	.filter_get_rx_id_limit = efx_mcdi_filter_get_rx_id_limit,
+	.filter_get_rx_ids = efx_mcdi_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
+	.filter_rfs_expire_one = efx_mcdi_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD
 	.mtd_probe = efx_ef10_mtd_probe,
@@ -6844,7 +4202,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
 	.offload_features = EF10_OFFLOAD_FEATURES,
 	.mcdi_max_ver = 2,
-	.max_rx_ip_filters = HUNT_FILTER_TBL_ROWS,
+	.max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS,
 	.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
 			    1 << HWTSTAMP_FILTER_ALL,
 	.rx_hash_key_size = 40,
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 52bd43f45761..14393767ef9f 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -522,10 +522,9 @@ int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac)
 
 	if (!is_zero_ether_addr(mac)) {
 		rc = efx_ef10_vport_add_mac(efx, vf->vport_id, mac);
-		if (rc) {
-			eth_zero_addr(vf->mac);
+		if (rc)
 			goto fail;
-		}
+
 		if (vf->efx)
 			ether_addr_copy(vf->efx->net_dev->dev_addr, mac);
 	}
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 7a38d7f282a1..4481f21a1f43 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -23,6 +23,10 @@
 #include <net/gre.h>
 #include <net/udp_tunnel.h>
 #include "efx.h"
+#include "efx_common.h"
+#include "efx_channels.h"
+#include "rx_common.h"
+#include "tx_common.h"
 #include "nic.h"
 #include "io.h"
 #include "selftest.h"
@@ -39,56 +43,6 @@
  **************************************************************************
  */
 
-/* Loopback mode names (see LOOPBACK_MODE()) */
-const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
-const char *const efx_loopback_mode_names[] = {
-	[LOOPBACK_NONE]		= "NONE",
-	[LOOPBACK_DATA]		= "DATAPATH",
-	[LOOPBACK_GMAC]		= "GMAC",
-	[LOOPBACK_XGMII]	= "XGMII",
-	[LOOPBACK_XGXS]		= "XGXS",
-	[LOOPBACK_XAUI]		= "XAUI",
-	[LOOPBACK_GMII]		= "GMII",
-	[LOOPBACK_SGMII]	= "SGMII",
-	[LOOPBACK_XGBR]		= "XGBR",
-	[LOOPBACK_XFI]		= "XFI",
-	[LOOPBACK_XAUI_FAR]	= "XAUI_FAR",
-	[LOOPBACK_GMII_FAR]	= "GMII_FAR",
-	[LOOPBACK_SGMII_FAR]	= "SGMII_FAR",
-	[LOOPBACK_XFI_FAR]	= "XFI_FAR",
-	[LOOPBACK_GPHY]		= "GPHY",
-	[LOOPBACK_PHYXS]	= "PHYXS",
-	[LOOPBACK_PCS]		= "PCS",
-	[LOOPBACK_PMAPMD]	= "PMA/PMD",
-	[LOOPBACK_XPORT]	= "XPORT",
-	[LOOPBACK_XGMII_WS]	= "XGMII_WS",
-	[LOOPBACK_XAUI_WS]	= "XAUI_WS",
-	[LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
-	[LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
-	[LOOPBACK_GMII_WS]	= "GMII_WS",
-	[LOOPBACK_XFI_WS]	= "XFI_WS",
-	[LOOPBACK_XFI_WS_FAR]	= "XFI_WS_FAR",
-	[LOOPBACK_PHYXS_WS]	= "PHYXS_WS",
-};
-
-const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
-const char *const efx_reset_type_names[] = {
-	[RESET_TYPE_INVISIBLE]          = "INVISIBLE",
-	[RESET_TYPE_ALL]                = "ALL",
-	[RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
-	[RESET_TYPE_WORLD]              = "WORLD",
-	[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
-	[RESET_TYPE_DATAPATH]           = "DATAPATH",
-	[RESET_TYPE_MC_BIST]		= "MC_BIST",
-	[RESET_TYPE_DISABLE]            = "DISABLE",
-	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
-	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
-	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
-	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
-	[RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
-	[RESET_TYPE_MCDI_TIMEOUT]	= "MCDI_TIMEOUT (FLR)",
-};
-
 /* UDP tunnel type names */
 static const char *const efx_udp_tunnel_type_names[] = {
 	[TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
@@ -104,18 +58,6 @@ void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
 		snprintf(buf, buflen, "type %d", type);
 }
 
-/* Reset workqueue. If any NIC has a hardware failure then a reset will be
- * queued onto this work queue. This is not a per-nic work queue, because
- * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
- */
-static struct workqueue_struct *reset_workqueue;
-
-/* How often and how many times to poll for a reset while waiting for a
- * BIST that another function started to complete.
- */
-#define BIST_WAIT_DELAY_MS	100
-#define BIST_WAIT_DELAY_COUNT	100
-
 /**************************************************************************
  *
  * Configurable values
@@ -135,21 +77,6 @@ module_param(efx_separate_tx_channels, bool, 0444);
 MODULE_PARM_DESC(efx_separate_tx_channels,
 		 "Use separate channels for TX and RX");
 
-/* This is the weight assigned to each of the (per-channel) virtual
- * NAPI devices.
- */
-static int napi_weight = 64;
-
-/* This is the time (in jiffies) between invocations of the hardware
- * monitor.
- * On Falcon-based NICs, this will:
- * - Check the on-board hardware monitor;
- * - Poll the link state and reconfigure the hardware as necessary.
- * On Siena-based NICs for power systems with EEH support, this will give EEH a
- * chance to start.
- */
-static unsigned int efx_monitor_interval = 1 * HZ;
-
 /* Initial interrupt moderation settings.  They can be modified after
  * module load with ethtool.
  *
@@ -169,38 +96,10 @@ static unsigned int rx_irq_mod_usec = 60;
  */
 static unsigned int tx_irq_mod_usec = 150;
 
-/* This is the first interrupt mode to try out of:
- * 0 => MSI-X
- * 1 => MSI
- * 2 => legacy
- */
-static unsigned int interrupt_mode;
-
-/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
- * i.e. the number of CPUs among which we may distribute simultaneous
- * interrupt handling.
- *
- * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
- * The default (0) means to assign an interrupt to each core.
- */
-static unsigned int rss_cpus;
-module_param(rss_cpus, uint, 0444);
-MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
-
 static bool phy_flash_cfg;
 module_param(phy_flash_cfg, bool, 0644);
 MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
 
-static unsigned irq_adapt_low_thresh = 8000;
-module_param(irq_adapt_low_thresh, uint, 0644);
-MODULE_PARM_DESC(irq_adapt_low_thresh,
-		 "Threshold score for reducing IRQ moderation");
-
-static unsigned irq_adapt_high_thresh = 16000;
-module_param(irq_adapt_high_thresh, uint, 0644);
-MODULE_PARM_DESC(irq_adapt_high_thresh,
-		 "Threshold score for increasing IRQ moderation");
-
 static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
 			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
@@ -214,18 +113,8 @@ MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
  *
  *************************************************************************/
 
-static int efx_soft_enable_interrupts(struct efx_nic *efx);
-static void efx_soft_disable_interrupts(struct efx_nic *efx);
-static void efx_remove_channel(struct efx_channel *channel);
-static void efx_remove_channels(struct efx_nic *efx);
 static const struct efx_channel_type efx_default_channel_type;
 static void efx_remove_port(struct efx_nic *efx);
-static void efx_init_napi_channel(struct efx_channel *channel);
-static void efx_fini_napi(struct efx_nic *efx);
-static void efx_fini_napi_channel(struct efx_channel *channel);
-static void efx_fini_struct(struct efx_nic *efx);
-static void efx_start_all(struct efx_nic *efx);
-static void efx_stop_all(struct efx_nic *efx);
 static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
 static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
 static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
@@ -239,776 +128,12 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
 			ASSERT_RTNL();			\
 	} while (0)
 
-static int efx_check_disabled(struct efx_nic *efx)
-{
-	if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
-		netif_err(efx, drv, efx->net_dev,
-			  "device is disabled due to earlier errors\n");
-		return -EIO;
-	}
-	return 0;
-}
-
-/**************************************************************************
- *
- * Event queue processing
- *
- *************************************************************************/
-
-/* Process channel's event queue
- *
- * This function is responsible for processing the event queue of a
- * single channel.  The caller must guarantee that this function will
- * never be concurrently called more than once on the same channel,
- * though different channels may be being processed concurrently.
- */
-static int efx_process_channel(struct efx_channel *channel, int budget)
-{
-	struct efx_tx_queue *tx_queue;
-	struct list_head rx_list;
-	int spent;
-
-	if (unlikely(!channel->enabled))
-		return 0;
-
-	/* Prepare the batch receive list */
-	EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
-	INIT_LIST_HEAD(&rx_list);
-	channel->rx_list = &rx_list;
-
-	efx_for_each_channel_tx_queue(tx_queue, channel) {
-		tx_queue->pkts_compl = 0;
-		tx_queue->bytes_compl = 0;
-	}
-
-	spent = efx_nic_process_eventq(channel, budget);
-	if (spent && efx_channel_has_rx_queue(channel)) {
-		struct efx_rx_queue *rx_queue =
-			efx_channel_get_rx_queue(channel);
-
-		efx_rx_flush_packet(channel);
-		efx_fast_push_rx_descriptors(rx_queue, true);
-	}
-
-	/* Update BQL */
-	efx_for_each_channel_tx_queue(tx_queue, channel) {
-		if (tx_queue->bytes_compl) {
-			netdev_tx_completed_queue(tx_queue->core_txq,
-				tx_queue->pkts_compl, tx_queue->bytes_compl);
-		}
-	}
-
-	/* Receive any packets we queued up */
-	netif_receive_skb_list(channel->rx_list);
-	channel->rx_list = NULL;
-
-	return spent;
-}
-
-/* NAPI poll handler
- *
- * NAPI guarantees serialisation of polls of the same device, which
- * provides the guarantee required by efx_process_channel().
- */
-static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
-{
-	int step = efx->irq_mod_step_us;
-
-	if (channel->irq_mod_score < irq_adapt_low_thresh) {
-		if (channel->irq_moderation_us > step) {
-			channel->irq_moderation_us -= step;
-			efx->type->push_irq_moderation(channel);
-		}
-	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
-		if (channel->irq_moderation_us <
-		    efx->irq_rx_moderation_us) {
-			channel->irq_moderation_us += step;
-			efx->type->push_irq_moderation(channel);
-		}
-	}
-
-	channel->irq_count = 0;
-	channel->irq_mod_score = 0;
-}
-
-static int efx_poll(struct napi_struct *napi, int budget)
-{
-	struct efx_channel *channel =
-		container_of(napi, struct efx_channel, napi_str);
-	struct efx_nic *efx = channel->efx;
-	int spent;
-
-	netif_vdbg(efx, intr, efx->net_dev,
-		   "channel %d NAPI poll executing on CPU %d\n",
-		   channel->channel, raw_smp_processor_id());
-
-	spent = efx_process_channel(channel, budget);
-
-	xdp_do_flush_map();
-
-	if (spent < budget) {
-		if (efx_channel_has_rx_queue(channel) &&
-		    efx->irq_rx_adaptive &&
-		    unlikely(++channel->irq_count == 1000)) {
-			efx_update_irq_mod(efx, channel);
-		}
-
-#ifdef CONFIG_RFS_ACCEL
-		/* Perhaps expire some ARFS filters */
-		mod_delayed_work(system_wq, &channel->filter_work, 0);
-#endif
-
-		/* There is no race here; although napi_disable() will
-		 * only wait for napi_complete(), this isn't a problem
-		 * since efx_nic_eventq_read_ack() will have no effect if
-		 * interrupts have already been disabled.
-		 */
-		if (napi_complete_done(napi, spent))
-			efx_nic_eventq_read_ack(channel);
-	}
-
-	return spent;
-}
-
-/* Create event queue
- * Event queue memory allocations are done only once.  If the channel
- * is reset, the memory buffer will be reused; this guards against
- * errors during channel reset and also simplifies interrupt handling.
- */
-static int efx_probe_eventq(struct efx_channel *channel)
-{
-	struct efx_nic *efx = channel->efx;
-	unsigned long entries;
-
-	netif_dbg(efx, probe, efx->net_dev,
-		  "chan %d create event queue\n", channel->channel);
-
-	/* Build an event queue with room for one event per tx and rx buffer,
-	 * plus some extra for link state events and MCDI completions. */
-	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
-	EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
-	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
-
-	return efx_nic_probe_eventq(channel);
-}
-
-/* Prepare channel's event queue */
-static int efx_init_eventq(struct efx_channel *channel)
-{
-	struct efx_nic *efx = channel->efx;
-	int rc;
-
-	EFX_WARN_ON_PARANOID(channel->eventq_init);
-
-	netif_dbg(efx, drv, efx->net_dev,
-		  "chan %d init event queue\n", channel->channel);
-
-	rc = efx_nic_init_eventq(channel);
-	if (rc == 0) {
-		efx->type->push_irq_moderation(channel);
-		channel->eventq_read_ptr = 0;
-		channel->eventq_init = true;
-	}
-	return rc;
-}
-
-/* Enable event queue processing and NAPI */
-void efx_start_eventq(struct efx_channel *channel)
-{
-	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
-		  "chan %d start event queue\n", channel->channel);
-
-	/* Make sure the NAPI handler sees the enabled flag set */
-	channel->enabled = true;
-	smp_wmb();
-
-	napi_enable(&channel->napi_str);
-	efx_nic_eventq_read_ack(channel);
-}
-
-/* Disable event queue processing and NAPI */
-void efx_stop_eventq(struct efx_channel *channel)
-{
-	if (!channel->enabled)
-		return;
-
-	napi_disable(&channel->napi_str);
-	channel->enabled = false;
-}
-
-static void efx_fini_eventq(struct efx_channel *channel)
-{
-	if (!channel->eventq_init)
-		return;
-
-	netif_dbg(channel->efx, drv, channel->efx->net_dev,
-		  "chan %d fini event queue\n", channel->channel);
-
-	efx_nic_fini_eventq(channel);
-	channel->eventq_init = false;
-}
-
-static void efx_remove_eventq(struct efx_channel *channel)
-{
-	netif_dbg(channel->efx, drv, channel->efx->net_dev,
-		  "chan %d remove event queue\n", channel->channel);
-
-	efx_nic_remove_eventq(channel);
-}
-
-/**************************************************************************
- *
- * Channel handling
- *
- *************************************************************************/
-
-/* Allocate and initialise a channel structure. */
-static struct efx_channel *
-efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
-{
-	struct efx_channel *channel;
-	struct efx_rx_queue *rx_queue;
-	struct efx_tx_queue *tx_queue;
-	int j;
-
-	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
-	if (!channel)
-		return NULL;
-
-	channel->efx = efx;
-	channel->channel = i;
-	channel->type = &efx_default_channel_type;
-
-	for (j = 0; j < EFX_TXQ_TYPES; j++) {
-		tx_queue = &channel->tx_queue[j];
-		tx_queue->efx = efx;
-		tx_queue->queue = i * EFX_TXQ_TYPES + j;
-		tx_queue->channel = channel;
-	}
-
-#ifdef CONFIG_RFS_ACCEL
-	INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
-#endif
-
-	rx_queue = &channel->rx_queue;
-	rx_queue->efx = efx;
-	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
-
-	return channel;
-}
-
-/* Allocate and initialise a channel structure, copying parameters
- * (but not resources) from an old channel structure.
- */
-static struct efx_channel *
-efx_copy_channel(const struct efx_channel *old_channel)
-{
-	struct efx_channel *channel;
-	struct efx_rx_queue *rx_queue;
-	struct efx_tx_queue *tx_queue;
-	int j;
-
-	channel = kmalloc(sizeof(*channel), GFP_KERNEL);
-	if (!channel)
-		return NULL;
-
-	*channel = *old_channel;
-
-	channel->napi_dev = NULL;
-	INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
-	channel->napi_str.napi_id = 0;
-	channel->napi_str.state = 0;
-	memset(&channel->eventq, 0, sizeof(channel->eventq));
-
-	for (j = 0; j < EFX_TXQ_TYPES; j++) {
-		tx_queue = &channel->tx_queue[j];
-		if (tx_queue->channel)
-			tx_queue->channel = channel;
-		tx_queue->buffer = NULL;
-		memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
-	}
-
-	rx_queue = &channel->rx_queue;
-	rx_queue->buffer = NULL;
-	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
-	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
-#ifdef CONFIG_RFS_ACCEL
-	INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
-#endif
-
-	return channel;
-}
-
-static int efx_probe_channel(struct efx_channel *channel)
-{
-	struct efx_tx_queue *tx_queue;
-	struct efx_rx_queue *rx_queue;
-	int rc;
-
-	netif_dbg(channel->efx, probe, channel->efx->net_dev,
-		  "creating channel %d\n", channel->channel);
-
-	rc = channel->type->pre_probe(channel);
-	if (rc)
-		goto fail;
-
-	rc = efx_probe_eventq(channel);
-	if (rc)
-		goto fail;
-
-	efx_for_each_channel_tx_queue(tx_queue, channel) {
-		rc = efx_probe_tx_queue(tx_queue);
-		if (rc)
-			goto fail;
-	}
-
-	efx_for_each_channel_rx_queue(rx_queue, channel) {
-		rc = efx_probe_rx_queue(rx_queue);
-		if (rc)
-			goto fail;
-	}
-
-	channel->rx_list = NULL;
-
-	return 0;
-
-fail:
-	efx_remove_channel(channel);
-	return rc;
-}
-
-static void
-efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
-{
-	struct efx_nic *efx = channel->efx;
-	const char *type;
-	int number;
-
-	number = channel->channel;
-
-	if (number >= efx->xdp_channel_offset &&
-	    !WARN_ON_ONCE(!efx->n_xdp_channels)) {
-		type = "-xdp";
-		number -= efx->xdp_channel_offset;
-	} else if (efx->tx_channel_offset == 0) {
-		type = "";
-	} else if (number < efx->tx_channel_offset) {
-		type = "-rx";
-	} else {
-		type = "-tx";
-		number -= efx->tx_channel_offset;
-	}
-	snprintf(buf, len, "%s%s-%d", efx->name, type, number);
-}
-
-static void efx_set_channel_names(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx)
-		channel->type->get_name(channel,
-					efx->msi_context[channel->channel].name,
-					sizeof(efx->msi_context[0].name));
-}
-
-static int efx_probe_channels(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-	int rc;
-
-	/* Restart special buffer allocation */
-	efx->next_buffer_table = 0;
-
-	/* Probe channels in reverse, so that any 'extra' channels
-	 * use the start of the buffer table. This allows the traffic
-	 * channels to be resized without moving them or wasting the
-	 * entries before them.
-	 */
-	efx_for_each_channel_rev(channel, efx) {
-		rc = efx_probe_channel(channel);
-		if (rc) {
-			netif_err(efx, probe, efx->net_dev,
-				  "failed to create channel %d\n",
-				  channel->channel);
-			goto fail;
-		}
-	}
-	efx_set_channel_names(efx);
-
-	return 0;
-
-fail:
-	efx_remove_channels(efx);
-	return rc;
-}
-
-/* Channels are shutdown and reinitialised whilst the NIC is running
- * to propagate configuration changes (mtu, checksum offload), or
- * to clear hardware error conditions
- */
-static void efx_start_datapath(struct efx_nic *efx)
-{
-	netdev_features_t old_features = efx->net_dev->features;
-	bool old_rx_scatter = efx->rx_scatter;
-	struct efx_tx_queue *tx_queue;
-	struct efx_rx_queue *rx_queue;
-	struct efx_channel *channel;
-	size_t rx_buf_len;
-
-	/* Calculate the rx buffer allocation parameters required to
-	 * support the current MTU, including padding for header
-	 * alignment and overruns.
-	 */
-	efx->rx_dma_len = (efx->rx_prefix_size +
-			   EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
-			   efx->type->rx_buffer_padding);
-	rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
-		      efx->rx_ip_align + efx->rx_dma_len);
-	if (rx_buf_len <= PAGE_SIZE) {
-		efx->rx_scatter = efx->type->always_rx_scatter;
-		efx->rx_buffer_order = 0;
-	} else if (efx->type->can_rx_scatter) {
-		BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
-		BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
-			     2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
-				       EFX_RX_BUF_ALIGNMENT) >
-			     PAGE_SIZE);
-		efx->rx_scatter = true;
-		efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
-		efx->rx_buffer_order = 0;
-	} else {
-		efx->rx_scatter = false;
-		efx->rx_buffer_order = get_order(rx_buf_len);
-	}
-
-	efx_rx_config_page_split(efx);
-	if (efx->rx_buffer_order)
-		netif_dbg(efx, drv, efx->net_dev,
-			  "RX buf len=%u; page order=%u batch=%u\n",
-			  efx->rx_dma_len, efx->rx_buffer_order,
-			  efx->rx_pages_per_batch);
-	else
-		netif_dbg(efx, drv, efx->net_dev,
-			  "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
-			  efx->rx_dma_len, efx->rx_page_buf_step,
-			  efx->rx_bufs_per_page, efx->rx_pages_per_batch);
-
-	/* Restore previously fixed features in hw_features and remove
-	 * features which are fixed now
-	 */
-	efx->net_dev->hw_features |= efx->net_dev->features;
-	efx->net_dev->hw_features &= ~efx->fixed_features;
-	efx->net_dev->features |= efx->fixed_features;
-	if (efx->net_dev->features != old_features)
-		netdev_features_change(efx->net_dev);
-
-	/* RX filters may also have scatter-enabled flags */
-	if (efx->rx_scatter != old_rx_scatter)
-		efx->type->filter_update_rx_scatter(efx);
-
-	/* We must keep at least one descriptor in a TX ring empty.
-	 * We could avoid this when the queue size does not exactly
-	 * match the hardware ring size, but it's not that important.
-	 * Therefore we stop the queue when one more skb might fill
-	 * the ring completely.  We wake it when half way back to
-	 * empty.
-	 */
-	efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
-	efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
-
-	/* Initialise the channels */
-	efx_for_each_channel(channel, efx) {
-		efx_for_each_channel_tx_queue(tx_queue, channel) {
-			efx_init_tx_queue(tx_queue);
-			atomic_inc(&efx->active_queues);
-		}
-
-		efx_for_each_channel_rx_queue(rx_queue, channel) {
-			efx_init_rx_queue(rx_queue);
-			atomic_inc(&efx->active_queues);
-			efx_stop_eventq(channel);
-			efx_fast_push_rx_descriptors(rx_queue, false);
-			efx_start_eventq(channel);
-		}
-
-		WARN_ON(channel->rx_pkt_n_frags);
-	}
-
-	efx_ptp_start_datapath(efx);
-
-	if (netif_device_present(efx->net_dev))
-		netif_tx_wake_all_queues(efx->net_dev);
-}
-
-static void efx_stop_datapath(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-	struct efx_tx_queue *tx_queue;
-	struct efx_rx_queue *rx_queue;
-	int rc;
-
-	EFX_ASSERT_RESET_SERIALISED(efx);
-	BUG_ON(efx->port_enabled);
-
-	efx_ptp_stop_datapath(efx);
-
-	/* Stop RX refill */
-	efx_for_each_channel(channel, efx) {
-		efx_for_each_channel_rx_queue(rx_queue, channel)
-			rx_queue->refill_enabled = false;
-	}
-
-	efx_for_each_channel(channel, efx) {
-		/* RX packet processing is pipelined, so wait for the
-		 * NAPI handler to complete.  At least event queue 0
-		 * might be kept active by non-data events, so don't
-		 * use napi_synchronize() but actually disable NAPI
-		 * temporarily.
-		 */
-		if (efx_channel_has_rx_queue(channel)) {
-			efx_stop_eventq(channel);
-			efx_start_eventq(channel);
-		}
-	}
-
-	rc = efx->type->fini_dmaq(efx);
-	if (rc) {
-		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
-	} else {
-		netif_dbg(efx, drv, efx->net_dev,
-			  "successfully flushed all queues\n");
-	}
-
-	efx_for_each_channel(channel, efx) {
-		efx_for_each_channel_rx_queue(rx_queue, channel)
-			efx_fini_rx_queue(rx_queue);
-		efx_for_each_possible_channel_tx_queue(tx_queue, channel)
-			efx_fini_tx_queue(tx_queue);
-	}
-	efx->xdp_rxq_info_failed = false;
-}
-
-static void efx_remove_channel(struct efx_channel *channel)
-{
-	struct efx_tx_queue *tx_queue;
-	struct efx_rx_queue *rx_queue;
-
-	netif_dbg(channel->efx, drv, channel->efx->net_dev,
-		  "destroy chan %d\n", channel->channel);
-
-	efx_for_each_channel_rx_queue(rx_queue, channel)
-		efx_remove_rx_queue(rx_queue);
-	efx_for_each_possible_channel_tx_queue(tx_queue, channel)
-		efx_remove_tx_queue(tx_queue);
-	efx_remove_eventq(channel);
-	channel->type->post_remove(channel);
-}
-
-static void efx_remove_channels(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx)
-		efx_remove_channel(channel);
-
-	kfree(efx->xdp_tx_queues);
-}
-
-int
-efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
-{
-	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
-	u32 old_rxq_entries, old_txq_entries;
-	unsigned i, next_buffer_table = 0;
-	int rc, rc2;
-
-	rc = efx_check_disabled(efx);
-	if (rc)
-		return rc;
-
-	/* Not all channels should be reallocated. We must avoid
-	 * reallocating their buffer table entries.
-	 */
-	efx_for_each_channel(channel, efx) {
-		struct efx_rx_queue *rx_queue;
-		struct efx_tx_queue *tx_queue;
-
-		if (channel->type->copy)
-			continue;
-		next_buffer_table = max(next_buffer_table,
-					channel->eventq.index +
-					channel->eventq.entries);
-		efx_for_each_channel_rx_queue(rx_queue, channel)
-			next_buffer_table = max(next_buffer_table,
-						rx_queue->rxd.index +
-						rx_queue->rxd.entries);
-		efx_for_each_channel_tx_queue(tx_queue, channel)
-			next_buffer_table = max(next_buffer_table,
-						tx_queue->txd.index +
-						tx_queue->txd.entries);
-	}
-
-	efx_device_detach_sync(efx);
-	efx_stop_all(efx);
-	efx_soft_disable_interrupts(efx);
-
-	/* Clone channels (where possible) */
-	memset(other_channel, 0, sizeof(other_channel));
-	for (i = 0; i < efx->n_channels; i++) {
-		channel = efx->channel[i];
-		if (channel->type->copy)
-			channel = channel->type->copy(channel);
-		if (!channel) {
-			rc = -ENOMEM;
-			goto out;
-		}
-		other_channel[i] = channel;
-	}
-
-	/* Swap entry counts and channel pointers */
-	old_rxq_entries = efx->rxq_entries;
-	old_txq_entries = efx->txq_entries;
-	efx->rxq_entries = rxq_entries;
-	efx->txq_entries = txq_entries;
-	for (i = 0; i < efx->n_channels; i++) {
-		channel = efx->channel[i];
-		efx->channel[i] = other_channel[i];
-		other_channel[i] = channel;
-	}
-
-	/* Restart buffer table allocation */
-	efx->next_buffer_table = next_buffer_table;
-
-	for (i = 0; i < efx->n_channels; i++) {
-		channel = efx->channel[i];
-		if (!channel->type->copy)
-			continue;
-		rc = efx_probe_channel(channel);
-		if (rc)
-			goto rollback;
-		efx_init_napi_channel(efx->channel[i]);
-	}
-
-out:
-	/* Destroy unused channel structures */
-	for (i = 0; i < efx->n_channels; i++) {
-		channel = other_channel[i];
-		if (channel && channel->type->copy) {
-			efx_fini_napi_channel(channel);
-			efx_remove_channel(channel);
-			kfree(channel);
-		}
-	}
-
-	rc2 = efx_soft_enable_interrupts(efx);
-	if (rc2) {
-		rc = rc ? rc : rc2;
-		netif_err(efx, drv, efx->net_dev,
-			  "unable to restart interrupts on channel reallocation\n");
-		efx_schedule_reset(efx, RESET_TYPE_DISABLE);
-	} else {
-		efx_start_all(efx);
-		efx_device_attach_if_not_resetting(efx);
-	}
-	return rc;
-
-rollback:
-	/* Swap back */
-	efx->rxq_entries = old_rxq_entries;
-	efx->txq_entries = old_txq_entries;
-	for (i = 0; i < efx->n_channels; i++) {
-		channel = efx->channel[i];
-		efx->channel[i] = other_channel[i];
-		other_channel[i] = channel;
-	}
-	goto out;
-}
-
-void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
-{
-	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
-}
-
-static bool efx_default_channel_want_txqs(struct efx_channel *channel)
-{
-	return channel->channel - channel->efx->tx_channel_offset <
-		channel->efx->n_tx_channels;
-}
-
-static const struct efx_channel_type efx_default_channel_type = {
-	.pre_probe		= efx_channel_dummy_op_int,
-	.post_remove		= efx_channel_dummy_op_void,
-	.get_name		= efx_get_channel_name,
-	.copy			= efx_copy_channel,
-	.want_txqs		= efx_default_channel_want_txqs,
-	.keep_eventq		= false,
-	.want_pio		= true,
-};
-
-int efx_channel_dummy_op_int(struct efx_channel *channel)
-{
-	return 0;
-}
-
-void efx_channel_dummy_op_void(struct efx_channel *channel)
-{
-}
-
 /**************************************************************************
  *
  * Port handling
  *
  **************************************************************************/
 
-/* This ensures that the kernel is kept informed (via
- * netif_carrier_on/off) of the link status, and also maintains the
- * link status's stop on the port's TX queue.
- */
-void efx_link_status_changed(struct efx_nic *efx)
-{
-	struct efx_link_state *link_state = &efx->link_state;
-
-	/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
-	 * that no events are triggered between unregister_netdev() and the
-	 * driver unloading. A more general condition is that NETDEV_CHANGE
-	 * can only be generated between NETDEV_UP and NETDEV_DOWN */
-	if (!netif_running(efx->net_dev))
-		return;
-
-	if (link_state->up != netif_carrier_ok(efx->net_dev)) {
-		efx->n_link_state_changes++;
-
-		if (link_state->up)
-			netif_carrier_on(efx->net_dev);
-		else
-			netif_carrier_off(efx->net_dev);
-	}
-
-	/* Status message for kernel log */
-	if (link_state->up)
-		netif_info(efx, link, efx->net_dev,
-			   "link up at %uMbps %s-duplex (MTU %d)\n",
-			   link_state->speed, link_state->fd ? "full" : "half",
-			   efx->net_dev->mtu);
-	else
-		netif_info(efx, link, efx->net_dev, "link down\n");
-}
-
-void efx_link_set_advertising(struct efx_nic *efx,
-			      const unsigned long *advertising)
-{
-	memcpy(efx->link_advertising, advertising,
-	       sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
-
-	efx->link_advertising[0] |= ADVERTISED_Autoneg;
-	if (advertising[0] & ADVERTISED_Pause)
-		efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
-	else
-		efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
-	if (advertising[0] & ADVERTISED_Asym_Pause)
-		efx->wanted_fc ^= EFX_FC_TX;
-}
-
 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not
  * force the Autoneg bit on.
  */
@@ -1035,73 +160,6 @@ void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
 
 static void efx_fini_port(struct efx_nic *efx);
 
-/* We assume that efx->type->reconfigure_mac will always try to sync RX
- * filters and therefore needs to read-lock the filter table against freeing
- */
-void efx_mac_reconfigure(struct efx_nic *efx)
-{
-	down_read(&efx->filter_sem);
-	efx->type->reconfigure_mac(efx);
-	up_read(&efx->filter_sem);
-}
-
-/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
- * the MAC appropriately. All other PHY configuration changes are pushed
- * through phy_op->set_settings(), and pushed asynchronously to the MAC
- * through efx_monitor().
- *
- * Callers must hold the mac_lock
- */
-int __efx_reconfigure_port(struct efx_nic *efx)
-{
-	enum efx_phy_mode phy_mode;
-	int rc;
-
-	WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
-	/* Disable PHY transmit in mac level loopbacks */
-	phy_mode = efx->phy_mode;
-	if (LOOPBACK_INTERNAL(efx))
-		efx->phy_mode |= PHY_MODE_TX_DISABLED;
-	else
-		efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
-
-	rc = efx->type->reconfigure_port(efx);
-
-	if (rc)
-		efx->phy_mode = phy_mode;
-
-	return rc;
-}
-
-/* Reinitialise the MAC to pick up new PHY settings, even if the port is
- * disabled. */
-int efx_reconfigure_port(struct efx_nic *efx)
-{
-	int rc;
-
-	EFX_ASSERT_RESET_SERIALISED(efx);
-
-	mutex_lock(&efx->mac_lock);
-	rc = __efx_reconfigure_port(efx);
-	mutex_unlock(&efx->mac_lock);
-
-	return rc;
-}
-
-/* Asynchronous work item for changing MAC promiscuity and multicast
- * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
- * MAC directly. */
-static void efx_mac_work(struct work_struct *data)
-{
-	struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
-
-	mutex_lock(&efx->mac_lock);
-	if (efx->port_enabled)
-		efx_mac_reconfigure(efx);
-	mutex_unlock(&efx->mac_lock);
-}
-
 static int efx_probe_port(struct efx_nic *efx)
 {
 	int rc;
@@ -1155,44 +213,6 @@ fail1:
 	return rc;
 }
 
-static void efx_start_port(struct efx_nic *efx)
-{
-	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
-	BUG_ON(efx->port_enabled);
-
-	mutex_lock(&efx->mac_lock);
-	efx->port_enabled = true;
-
-	/* Ensure MAC ingress/egress is enabled */
-	efx_mac_reconfigure(efx);
-
-	mutex_unlock(&efx->mac_lock);
-}
-
-/* Cancel work for MAC reconfiguration, periodic hardware monitoring
- * and the async self-test, wait for them to finish and prevent them
- * being scheduled again.  This doesn't cover online resets, which
- * should only be cancelled when removing the device.
- */
-static void efx_stop_port(struct efx_nic *efx)
-{
-	netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
-
-	EFX_ASSERT_RESET_SERIALISED(efx);
-
-	mutex_lock(&efx->mac_lock);
-	efx->port_enabled = false;
-	mutex_unlock(&efx->mac_lock);
-
-	/* Serialise against efx_set_multicast_list() */
-	netif_addr_lock_bh(efx->net_dev);
-	netif_addr_unlock_bh(efx->net_dev);
-
-	cancel_delayed_work_sync(&efx->monitor_work);
-	efx_selftest_async_cancel(efx);
-	cancel_work_sync(&efx->mac_work);
-}
-
 static void efx_fini_port(struct efx_nic *efx)
 {
 	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
@@ -1291,582 +311,6 @@ static void efx_dissociate(struct efx_nic *efx)
 	}
 }
 
-/* This configures the PCI device to enable I/O and DMA. */
-static int efx_init_io(struct efx_nic *efx)
-{
-	struct pci_dev *pci_dev = efx->pci_dev;
-	dma_addr_t dma_mask = efx->type->max_dma_mask;
-	unsigned int mem_map_size = efx->type->mem_map_size(efx);
-	int rc, bar;
-
-	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
-
-	bar = efx->type->mem_bar(efx);
-
-	rc = pci_enable_device(pci_dev);
-	if (rc) {
-		netif_err(efx, probe, efx->net_dev,
-			  "failed to enable PCI device\n");
-		goto fail1;
-	}
-
-	pci_set_master(pci_dev);
-
-	/* Set the PCI DMA mask.  Try all possibilities from our genuine mask
-	 * down to 32 bits, because some architectures will allow 40 bit
-	 * masks event though they reject 46 bit masks.
-	 */
-	while (dma_mask > 0x7fffffffUL) {
-		rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
-		if (rc == 0)
-			break;
-		dma_mask >>= 1;
-	}
-	if (rc) {
-		netif_err(efx, probe, efx->net_dev,
-			  "could not find a suitable DMA mask\n");
-		goto fail2;
-	}
-	netif_dbg(efx, probe, efx->net_dev,
-		  "using DMA mask %llx\n", (unsigned long long) dma_mask);
-
-	efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
-	rc = pci_request_region(pci_dev, bar, "sfc");
-	if (rc) {
-		netif_err(efx, probe, efx->net_dev,
-			  "request for memory BAR failed\n");
-		rc = -EIO;
-		goto fail3;
-	}
-	efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
-	if (!efx->membase) {
-		netif_err(efx, probe, efx->net_dev,
-			  "could not map memory BAR at %llx+%x\n",
-			  (unsigned long long)efx->membase_phys, mem_map_size);
-		rc = -ENOMEM;
-		goto fail4;
-	}
-	netif_dbg(efx, probe, efx->net_dev,
-		  "memory BAR at %llx+%x (virtual %p)\n",
-		  (unsigned long long)efx->membase_phys, mem_map_size,
-		  efx->membase);
-
-	return 0;
-
- fail4:
-	pci_release_region(efx->pci_dev, bar);
- fail3:
-	efx->membase_phys = 0;
- fail2:
-	pci_disable_device(efx->pci_dev);
- fail1:
-	return rc;
-}
-
-static void efx_fini_io(struct efx_nic *efx)
-{
-	int bar;
-
-	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
-
-	if (efx->membase) {
-		iounmap(efx->membase);
-		efx->membase = NULL;
-	}
-
-	if (efx->membase_phys) {
-		bar = efx->type->mem_bar(efx);
-		pci_release_region(efx->pci_dev, bar);
-		efx->membase_phys = 0;
-	}
-
-	/* Don't disable bus-mastering if VFs are assigned */
-	if (!pci_vfs_assigned(efx->pci_dev))
-		pci_disable_device(efx->pci_dev);
-}
-
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
-				    struct efx_rss_context *ctx)
-{
-	size_t i;
-
-	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
-		ctx->rx_indir_table[i] =
-			ethtool_rxfh_indir_default(i, efx->rss_spread);
-}
-
-static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
-{
-	cpumask_var_t thread_mask;
-	unsigned int count;
-	int cpu;
-
-	if (rss_cpus) {
-		count = rss_cpus;
-	} else {
-		if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
-			netif_warn(efx, probe, efx->net_dev,
-				   "RSS disabled due to allocation failure\n");
-			return 1;
-		}
-
-		count = 0;
-		for_each_online_cpu(cpu) {
-			if (!cpumask_test_cpu(cpu, thread_mask)) {
-				++count;
-				cpumask_or(thread_mask, thread_mask,
-					   topology_sibling_cpumask(cpu));
-			}
-		}
-
-		free_cpumask_var(thread_mask);
-	}
-
-	if (count > EFX_MAX_RX_QUEUES) {
-		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
-			       "Reducing number of rx queues from %u to %u.\n",
-			       count, EFX_MAX_RX_QUEUES);
-		count = EFX_MAX_RX_QUEUES;
-	}
-
-	/* If RSS is requested for the PF *and* VFs then we can't write RSS
-	 * table entries that are inaccessible to VFs
-	 */
-#ifdef CONFIG_SFC_SRIOV
-	if (efx->type->sriov_wanted) {
-		if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
-		    count > efx_vf_size(efx)) {
-			netif_warn(efx, probe, efx->net_dev,
-				   "Reducing number of RSS channels from %u to %u for "
-				   "VF support. Increase vf-msix-limit to use more "
-				   "channels on the PF.\n",
-				   count, efx_vf_size(efx));
-			count = efx_vf_size(efx);
-		}
-	}
-#endif
-
-	return count;
-}
-
-static int efx_allocate_msix_channels(struct efx_nic *efx,
-				      unsigned int max_channels,
-				      unsigned int extra_channels,
-				      unsigned int parallelism)
-{
-	unsigned int n_channels = parallelism;
-	int vec_count;
-	int n_xdp_tx;
-	int n_xdp_ev;
-
-	if (efx_separate_tx_channels)
-		n_channels *= 2;
-	n_channels += extra_channels;
-
-	/* To allow XDP transmit to happen from arbitrary NAPI contexts
-	 * we allocate a TX queue per CPU. We share event queues across
-	 * multiple tx queues, assuming tx and ev queues are both
-	 * maximum size.
-	 */
-
-	n_xdp_tx = num_possible_cpus();
-	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
-
-	vec_count = pci_msix_vec_count(efx->pci_dev);
-	if (vec_count < 0)
-		return vec_count;
-
-	max_channels = min_t(unsigned int, vec_count, max_channels);
-
-	/* Check resources.
-	 * We need a channel per event queue, plus a VI per tx queue.
-	 * This may be more pessimistic than it needs to be.
-	 */
-	if (n_channels + n_xdp_ev > max_channels) {
-		netif_err(efx, drv, efx->net_dev,
-			  "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
-			  n_xdp_ev, n_channels, max_channels);
-		efx->n_xdp_channels = 0;
-		efx->xdp_tx_per_channel = 0;
-		efx->xdp_tx_queue_count = 0;
-	} else {
-		efx->n_xdp_channels = n_xdp_ev;
-		efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
-		efx->xdp_tx_queue_count = n_xdp_tx;
-		n_channels += n_xdp_ev;
-		netif_dbg(efx, drv, efx->net_dev,
-			  "Allocating %d TX and %d event queues for XDP\n",
-			  n_xdp_tx, n_xdp_ev);
-	}
-
-	if (vec_count < n_channels) {
-		netif_err(efx, drv, efx->net_dev,
-			  "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
-			  vec_count, n_channels);
-		netif_err(efx, drv, efx->net_dev,
-			  "WARNING: Performance may be reduced.\n");
-		n_channels = vec_count;
-	}
-
-	n_channels = min(n_channels, max_channels);
-
-	efx->n_channels = n_channels;
-
-	/* Ignore XDP tx channels when creating rx channels. */
-	n_channels -= efx->n_xdp_channels;
-
-	if (efx_separate_tx_channels) {
-		efx->n_tx_channels =
-			min(max(n_channels / 2, 1U),
-			    efx->max_tx_channels);
-		efx->tx_channel_offset =
-			n_channels - efx->n_tx_channels;
-		efx->n_rx_channels =
-			max(n_channels -
-			    efx->n_tx_channels, 1U);
-	} else {
-		efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
-		efx->tx_channel_offset = 0;
-		efx->n_rx_channels = n_channels;
-	}
-
-	efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
-	efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
-
-	efx->xdp_channel_offset = n_channels;
-
-	netif_dbg(efx, drv, efx->net_dev,
-		  "Allocating %u RX channels\n",
-		  efx->n_rx_channels);
-
-	return efx->n_channels;
-}
-
-/* Probe the number and type of interrupts we are able to obtain, and
- * the resulting numbers of channels and RX queues.
- */
-static int efx_probe_interrupts(struct efx_nic *efx)
-{
-	unsigned int extra_channels = 0;
-	unsigned int rss_spread;
-	unsigned int i, j;
-	int rc;
-
-	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
-		if (efx->extra_channel_type[i])
-			++extra_channels;
-
-	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
-		unsigned int parallelism = efx_wanted_parallelism(efx);
-		struct msix_entry xentries[EFX_MAX_CHANNELS];
-		unsigned int n_channels;
-
-		rc = efx_allocate_msix_channels(efx, efx->max_channels,
-						extra_channels, parallelism);
-		if (rc >= 0) {
-			n_channels = rc;
-			for (i = 0; i < n_channels; i++)
-				xentries[i].entry = i;
-			rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
-						   n_channels);
-		}
-		if (rc < 0) {
-			/* Fall back to single channel MSI */
-			netif_err(efx, drv, efx->net_dev,
-				  "could not enable MSI-X\n");
-			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
-				efx->interrupt_mode = EFX_INT_MODE_MSI;
-			else
-				return rc;
-		} else if (rc < n_channels) {
-			netif_err(efx, drv, efx->net_dev,
-				  "WARNING: Insufficient MSI-X vectors"
-				  " available (%d < %u).\n", rc, n_channels);
-			netif_err(efx, drv, efx->net_dev,
-				  "WARNING: Performance may be reduced.\n");
-			n_channels = rc;
-		}
-
-		if (rc > 0) {
-			for (i = 0; i < efx->n_channels; i++)
-				efx_get_channel(efx, i)->irq =
-					xentries[i].vector;
-		}
-	}
-
-	/* Try single interrupt MSI */
-	if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
-		efx->n_channels = 1;
-		efx->n_rx_channels = 1;
-		efx->n_tx_channels = 1;
-		efx->n_xdp_channels = 0;
-		efx->xdp_channel_offset = efx->n_channels;
-		rc = pci_enable_msi(efx->pci_dev);
-		if (rc == 0) {
-			efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
-		} else {
-			netif_err(efx, drv, efx->net_dev,
-				  "could not enable MSI\n");
-			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
-				efx->interrupt_mode = EFX_INT_MODE_LEGACY;
-			else
-				return rc;
-		}
-	}
-
-	/* Assume legacy interrupts */
-	if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
-		efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
-		efx->n_rx_channels = 1;
-		efx->n_tx_channels = 1;
-		efx->n_xdp_channels = 0;
-		efx->xdp_channel_offset = efx->n_channels;
-		efx->legacy_irq = efx->pci_dev->irq;
-	}
-
-	/* Assign extra channels if possible, before XDP channels */
-	efx->n_extra_tx_channels = 0;
-	j = efx->xdp_channel_offset;
-	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
-		if (!efx->extra_channel_type[i])
-			continue;
-		if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
-			efx->extra_channel_type[i]->handle_no_channel(efx);
-		} else {
-			--j;
-			efx_get_channel(efx, j)->type =
-				efx->extra_channel_type[i];
-			if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
-				efx->n_extra_tx_channels++;
-		}
-	}
-
-	rss_spread = efx->n_rx_channels;
-	/* RSS might be usable on VFs even if it is disabled on the PF */
-#ifdef CONFIG_SFC_SRIOV
-	if (efx->type->sriov_wanted) {
-		efx->rss_spread = ((rss_spread > 1 ||
-				    !efx->type->sriov_wanted(efx)) ?
-				   rss_spread : efx_vf_size(efx));
-		return 0;
-	}
-#endif
-	efx->rss_spread = rss_spread;
-
-	return 0;
-}
-
-#if defined(CONFIG_SMP)
-static void efx_set_interrupt_affinity(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-	unsigned int cpu;
-
-	efx_for_each_channel(channel, efx) {
-		cpu = cpumask_local_spread(channel->channel,
-					   pcibus_to_node(efx->pci_dev->bus));
-		irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
-	}
-}
-
-static void efx_clear_interrupt_affinity(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx)
-		irq_set_affinity_hint(channel->irq, NULL);
-}
-#else
-static void
-efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
-{
-}
-
-static void
-efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
-{
-}
-#endif /* CONFIG_SMP */
-
-static int efx_soft_enable_interrupts(struct efx_nic *efx)
-{
-	struct efx_channel *channel, *end_channel;
-	int rc;
-
-	BUG_ON(efx->state == STATE_DISABLED);
-
-	efx->irq_soft_enabled = true;
-	smp_wmb();
-
-	efx_for_each_channel(channel, efx) {
-		if (!channel->type->keep_eventq) {
-			rc = efx_init_eventq(channel);
-			if (rc)
-				goto fail;
-		}
-		efx_start_eventq(channel);
-	}
-
-	efx_mcdi_mode_event(efx);
-
-	return 0;
-fail:
-	end_channel = channel;
-	efx_for_each_channel(channel, efx) {
-		if (channel == end_channel)
-			break;
-		efx_stop_eventq(channel);
-		if (!channel->type->keep_eventq)
-			efx_fini_eventq(channel);
-	}
-
-	return rc;
-}
-
-static void efx_soft_disable_interrupts(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	if (efx->state == STATE_DISABLED)
-		return;
-
-	efx_mcdi_mode_poll(efx);
-
-	efx->irq_soft_enabled = false;
-	smp_wmb();
-
-	if (efx->legacy_irq)
-		synchronize_irq(efx->legacy_irq);
-
-	efx_for_each_channel(channel, efx) {
-		if (channel->irq)
-			synchronize_irq(channel->irq);
-
-		efx_stop_eventq(channel);
-		if (!channel->type->keep_eventq)
-			efx_fini_eventq(channel);
-	}
-
-	/* Flush the asynchronous MCDI request queue */
-	efx_mcdi_flush_async(efx);
-}
-
-static int efx_enable_interrupts(struct efx_nic *efx)
-{
-	struct efx_channel *channel, *end_channel;
-	int rc;
-
-	BUG_ON(efx->state == STATE_DISABLED);
-
-	if (efx->eeh_disabled_legacy_irq) {
-		enable_irq(efx->legacy_irq);
-		efx->eeh_disabled_legacy_irq = false;
-	}
-
-	efx->type->irq_enable_master(efx);
-
-	efx_for_each_channel(channel, efx) {
-		if (channel->type->keep_eventq) {
-			rc = efx_init_eventq(channel);
-			if (rc)
-				goto fail;
-		}
-	}
-
-	rc = efx_soft_enable_interrupts(efx);
-	if (rc)
-		goto fail;
-
-	return 0;
-
-fail:
-	end_channel = channel;
-	efx_for_each_channel(channel, efx) {
-		if (channel == end_channel)
-			break;
-		if (channel->type->keep_eventq)
-			efx_fini_eventq(channel);
-	}
-
-	efx->type->irq_disable_non_ev(efx);
-
-	return rc;
-}
-
-static void efx_disable_interrupts(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	efx_soft_disable_interrupts(efx);
-
-	efx_for_each_channel(channel, efx) {
-		if (channel->type->keep_eventq)
-			efx_fini_eventq(channel);
-	}
-
-	efx->type->irq_disable_non_ev(efx);
-}
-
-static void efx_remove_interrupts(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	/* Remove MSI/MSI-X interrupts */
-	efx_for_each_channel(channel, efx)
-		channel->irq = 0;
-	pci_disable_msi(efx->pci_dev);
-	pci_disable_msix(efx->pci_dev);
-
-	/* Remove legacy interrupt */
-	efx->legacy_irq = 0;
-}
-
-static int efx_set_channels(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-	struct efx_tx_queue *tx_queue;
-	int xdp_queue_number;
-
-	efx->tx_channel_offset =
-		efx_separate_tx_channels ?
-		efx->n_channels - efx->n_tx_channels : 0;
-
-	if (efx->xdp_tx_queue_count) {
-		EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
-
-		/* Allocate array for XDP TX queue lookup. */
-		efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
-					     sizeof(*efx->xdp_tx_queues),
-					     GFP_KERNEL);
-		if (!efx->xdp_tx_queues)
-			return -ENOMEM;
-	}
-
-	/* We need to mark which channels really have RX and TX
-	 * queues, and adjust the TX queue numbers if we have separate
-	 * RX-only and TX-only channels.
-	 */
-	xdp_queue_number = 0;
-	efx_for_each_channel(channel, efx) {
-		if (channel->channel < efx->n_rx_channels)
-			channel->rx_queue.core_index = channel->channel;
-		else
-			channel->rx_queue.core_index = -1;
-
-		efx_for_each_channel_tx_queue(tx_queue, channel) {
-			tx_queue->queue -= (efx->tx_channel_offset *
-					    EFX_TXQ_TYPES);
-
-			if (efx_channel_is_xdp_tx(channel) &&
-			    xdp_queue_number < efx->xdp_tx_queue_count) {
-				efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
-				xdp_queue_number++;
-			}
-		}
-	}
-	return 0;
-}
-
 static int efx_probe_nic(struct efx_nic *efx)
 {
 	int rc;
@@ -1939,70 +383,6 @@ static void efx_remove_nic(struct efx_nic *efx)
 	efx->type->remove(efx);
 }
 
-static int efx_probe_filters(struct efx_nic *efx)
-{
-	int rc;
-
-	init_rwsem(&efx->filter_sem);
-	mutex_lock(&efx->mac_lock);
-	down_write(&efx->filter_sem);
-	rc = efx->type->filter_table_probe(efx);
-	if (rc)
-		goto out_unlock;
-
-#ifdef CONFIG_RFS_ACCEL
-	if (efx->type->offload_features & NETIF_F_NTUPLE) {
-		struct efx_channel *channel;
-		int i, success = 1;
-
-		efx_for_each_channel(channel, efx) {
-			channel->rps_flow_id =
-				kcalloc(efx->type->max_rx_ip_filters,
-					sizeof(*channel->rps_flow_id),
-					GFP_KERNEL);
-			if (!channel->rps_flow_id)
-				success = 0;
-			else
-				for (i = 0;
-				     i < efx->type->max_rx_ip_filters;
-				     ++i)
-					channel->rps_flow_id[i] =
-						RPS_FLOW_ID_INVALID;
-			channel->rfs_expire_index = 0;
-			channel->rfs_filter_count = 0;
-		}
-
-		if (!success) {
-			efx_for_each_channel(channel, efx)
-				kfree(channel->rps_flow_id);
-			efx->type->filter_table_remove(efx);
-			rc = -ENOMEM;
-			goto out_unlock;
-		}
-	}
-#endif
-out_unlock:
-	up_write(&efx->filter_sem);
-	mutex_unlock(&efx->mac_lock);
-	return rc;
-}
-
-static void efx_remove_filters(struct efx_nic *efx)
-{
-#ifdef CONFIG_RFS_ACCEL
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx) {
-		cancel_delayed_work_sync(&channel->filter_work);
-		kfree(channel->rps_flow_id);
-	}
-#endif
-	down_write(&efx->filter_sem);
-	efx->type->filter_table_remove(efx);
-	up_write(&efx->filter_sem);
-}
-
-
 /**************************************************************************
  *
  * NIC startup/shutdown
@@ -2067,81 +447,6 @@ static int efx_probe_all(struct efx_nic *efx)
 	return rc;
 }
 
-/* If the interface is supposed to be running but is not, start
- * the hardware and software data path, regular activity for the port
- * (MAC statistics, link polling, etc.) and schedule the port to be
- * reconfigured.  Interrupts must already be enabled.  This function
- * is safe to call multiple times, so long as the NIC is not disabled.
- * Requires the RTNL lock.
- */
-static void efx_start_all(struct efx_nic *efx)
-{
-	EFX_ASSERT_RESET_SERIALISED(efx);
-	BUG_ON(efx->state == STATE_DISABLED);
-
-	/* Check that it is appropriate to restart the interface. All
-	 * of these flags are safe to read under just the rtnl lock */
-	if (efx->port_enabled || !netif_running(efx->net_dev) ||
-	    efx->reset_pending)
-		return;
-
-	efx_start_port(efx);
-	efx_start_datapath(efx);
-
-	/* Start the hardware monitor if there is one */
-	if (efx->type->monitor != NULL)
-		queue_delayed_work(efx->workqueue, &efx->monitor_work,
-				   efx_monitor_interval);
-
-	/* Link state detection is normally event-driven; we have
-	 * to poll now because we could have missed a change
-	 */
-	mutex_lock(&efx->mac_lock);
-	if (efx->phy_op->poll(efx))
-		efx_link_status_changed(efx);
-	mutex_unlock(&efx->mac_lock);
-
-	efx->type->start_stats(efx);
-	efx->type->pull_stats(efx);
-	spin_lock_bh(&efx->stats_lock);
-	efx->type->update_stats(efx, NULL, NULL);
-	spin_unlock_bh(&efx->stats_lock);
-}
-
-/* Quiesce the hardware and software data path, and regular activity
- * for the port without bringing the link down.  Safe to call multiple
- * times with the NIC in almost any state, but interrupts should be
- * enabled.  Requires the RTNL lock.
- */
-static void efx_stop_all(struct efx_nic *efx)
-{
-	EFX_ASSERT_RESET_SERIALISED(efx);
-
-	/* port_enabled can be read safely under the rtnl lock */
-	if (!efx->port_enabled)
-		return;
-
-	/* update stats before we go down so we can accurately count
-	 * rx_nodesc_drops
-	 */
-	efx->type->pull_stats(efx);
-	spin_lock_bh(&efx->stats_lock);
-	efx->type->update_stats(efx, NULL, NULL);
-	spin_unlock_bh(&efx->stats_lock);
-	efx->type->stop_stats(efx);
-	efx_stop_port(efx);
-
-	/* Stop the kernel transmit interface.  This is only valid if
-	 * the device is stopped or detached; otherwise the watchdog
-	 * may fire immediately.
-	 */
-	WARN_ON(netif_running(efx->net_dev) &&
-		netif_device_present(efx->net_dev));
-	netif_tx_disable(efx->net_dev);
-
-	efx_stop_datapath(efx);
-}
-
 static void efx_remove_all(struct efx_nic *efx)
 {
 	rtnl_lock();
@@ -2237,36 +542,6 @@ void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
 
 /**************************************************************************
  *
- * Hardware monitor
- *
- **************************************************************************/
-
-/* Run periodically off the general workqueue */
-static void efx_monitor(struct work_struct *data)
-{
-	struct efx_nic *efx = container_of(data, struct efx_nic,
-					   monitor_work.work);
-
-	netif_vdbg(efx, timer, efx->net_dev,
-		   "hardware monitor executing on CPU %d\n",
-		   raw_smp_processor_id());
-	BUG_ON(efx->type->monitor == NULL);
-
-	/* If the mac_lock is already held then it is likely a port
-	 * reconfiguration is already in place, which will likely do
-	 * most of the work of monitor() anyway. */
-	if (mutex_trylock(&efx->mac_lock)) {
-		if (efx->port_enabled)
-			efx->type->monitor(efx);
-		mutex_unlock(&efx->mac_lock);
-	}
-
-	queue_delayed_work(efx->workqueue, &efx->monitor_work,
-			   efx_monitor_interval);
-}
-
-/**************************************************************************
- *
  * ioctls
  *
  *************************************************************************/
@@ -2294,45 +569,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
 
 /**************************************************************************
  *
- * NAPI interface
- *
- **************************************************************************/
-
-static void efx_init_napi_channel(struct efx_channel *channel)
-{
-	struct efx_nic *efx = channel->efx;
-
-	channel->napi_dev = efx->net_dev;
-	netif_napi_add(channel->napi_dev, &channel->napi_str,
-		       efx_poll, napi_weight);
-}
-
-static void efx_init_napi(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx)
-		efx_init_napi_channel(channel);
-}
-
-static void efx_fini_napi_channel(struct efx_channel *channel)
-{
-	if (channel->napi_dev)
-		netif_napi_del(&channel->napi_str);
-
-	channel->napi_dev = NULL;
-}
-
-static void efx_fini_napi(struct efx_nic *efx)
-{
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx)
-		efx_fini_napi_channel(channel);
-}
-
-/**************************************************************************
- *
  * Kernel net device interface
  *
  *************************************************************************/
@@ -2382,19 +618,8 @@ int efx_net_stop(struct net_device *net_dev)
 	return 0;
 }
 
-/* Context: process, dev_base_lock or RTNL held, non-blocking. */
-static void efx_net_stats(struct net_device *net_dev,
-			  struct rtnl_link_stats64 *stats)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-
-	spin_lock_bh(&efx->stats_lock);
-	efx->type->update_stats(efx, NULL, stats);
-	spin_unlock_bh(&efx->stats_lock);
-}
-
 /* Context: netif_tx_lock held, BHs disabled. */
-static void efx_watchdog(struct net_device *net_dev)
+static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -2405,51 +630,6 @@ static void efx_watchdog(struct net_device *net_dev)
 	efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
 }
 
-static unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
-{
-	/* The maximum MTU that we can fit in a single page, allowing for
-	 * framing, overhead and XDP headroom.
-	 */
-	int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
-		       efx->rx_prefix_size + efx->type->rx_buffer_padding +
-		       efx->rx_ip_align + XDP_PACKET_HEADROOM;
-
-	return PAGE_SIZE - overhead;
-}
-
-/* Context: process, rtnl_lock() held. */
-static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	int rc;
-
-	rc = efx_check_disabled(efx);
-	if (rc)
-		return rc;
-
-	if (rtnl_dereference(efx->xdp_prog) &&
-	    new_mtu > efx_xdp_max_mtu(efx)) {
-		netif_err(efx, drv, efx->net_dev,
-			  "Requested MTU of %d too big for XDP (max: %d)\n",
-			  new_mtu, efx_xdp_max_mtu(efx));
-		return -EINVAL;
-	}
-
-	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
-
-	efx_device_detach_sync(efx);
-	efx_stop_all(efx);
-
-	mutex_lock(&efx->mac_lock);
-	net_dev->mtu = new_mtu;
-	efx_mac_reconfigure(efx);
-	mutex_unlock(&efx->mac_lock);
-
-	efx_start_all(efx);
-	efx_device_attach_if_not_resetting(efx);
-	return 0;
-}
-
 static int efx_set_mac_address(struct net_device *net_dev, void *data)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
@@ -2726,28 +906,6 @@ show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
 }
 static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
 
-#ifdef CONFIG_SFC_MCDI_LOGGING
-static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	struct efx_nic *efx = dev_get_drvdata(dev);
-	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-
-	return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
-}
-static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
-{
-	struct efx_nic *efx = dev_get_drvdata(dev);
-	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-	bool enable = count > 0 && *buf != '0';
-
-	mcdi->logging_enabled = enable;
-	return count;
-}
-static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
-#endif
-
 static int efx_register_netdev(struct efx_nic *efx)
 {
 	struct net_device *net_dev = efx->net_dev;
@@ -2807,21 +965,11 @@ static int efx_register_netdev(struct efx_nic *efx)
 			  "failed to init net dev attributes\n");
 		goto fail_registered;
 	}
-#ifdef CONFIG_SFC_MCDI_LOGGING
-	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
-	if (rc) {
-		netif_err(efx, drv, efx->net_dev,
-			  "failed to init net dev attributes\n");
-		goto fail_attr_mcdi_logging;
-	}
-#endif
+
+	efx_init_mcdi_logging(efx);
 
 	return 0;
 
-#ifdef CONFIG_SFC_MCDI_LOGGING
-fail_attr_mcdi_logging:
-	device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
-#endif
 fail_registered:
 	rtnl_lock();
 	efx_dissociate(efx);
@@ -2842,9 +990,7 @@ static void efx_unregister_netdev(struct efx_nic *efx)
 
 	if (efx_dev_registered(efx)) {
 		strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
-#ifdef CONFIG_SFC_MCDI_LOGGING
-		device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
-#endif
+		efx_fini_mcdi_logging(efx);
 		device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
 		unregister_netdev(efx->net_dev);
 	}
@@ -2852,292 +998,6 @@ static void efx_unregister_netdev(struct efx_nic *efx)
 
 /**************************************************************************
  *
- * Device reset and suspend
- *
- **************************************************************************/
-
-/* Tears down the entire software state and most of the hardware state
- * before reset.  */
-void efx_reset_down(struct efx_nic *efx, enum reset_type method)
-{
-	EFX_ASSERT_RESET_SERIALISED(efx);
-
-	if (method == RESET_TYPE_MCDI_TIMEOUT)
-		efx->type->prepare_flr(efx);
-
-	efx_stop_all(efx);
-	efx_disable_interrupts(efx);
-
-	mutex_lock(&efx->mac_lock);
-	down_write(&efx->filter_sem);
-	mutex_lock(&efx->rss_lock);
-	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
-	    method != RESET_TYPE_DATAPATH)
-		efx->phy_op->fini(efx);
-	efx->type->fini(efx);
-}
-
-/* This function will always ensure that the locks acquired in
- * efx_reset_down() are released. A failure return code indicates
- * that we were unable to reinitialise the hardware, and the
- * driver should be disabled. If ok is false, then the rx and tx
- * engines are not restarted, pending a RESET_DISABLE. */
-int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
-{
-	int rc;
-
-	EFX_ASSERT_RESET_SERIALISED(efx);
-
-	if (method == RESET_TYPE_MCDI_TIMEOUT)
-		efx->type->finish_flr(efx);
-
-	/* Ensure that SRAM is initialised even if we're disabling the device */
-	rc = efx->type->init(efx);
-	if (rc) {
-		netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
-		goto fail;
-	}
-
-	if (!ok)
-		goto fail;
-
-	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
-	    method != RESET_TYPE_DATAPATH) {
-		rc = efx->phy_op->init(efx);
-		if (rc)
-			goto fail;
-		rc = efx->phy_op->reconfigure(efx);
-		if (rc && rc != -EPERM)
-			netif_err(efx, drv, efx->net_dev,
-				  "could not restore PHY settings\n");
-	}
-
-	rc = efx_enable_interrupts(efx);
-	if (rc)
-		goto fail;
-
-#ifdef CONFIG_SFC_SRIOV
-	rc = efx->type->vswitching_restore(efx);
-	if (rc) /* not fatal; the PF will still work fine */
-		netif_warn(efx, probe, efx->net_dev,
-			   "failed to restore vswitching rc=%d;"
-			   " VFs may not function\n", rc);
-#endif
-
-	if (efx->type->rx_restore_rss_contexts)
-		efx->type->rx_restore_rss_contexts(efx);
-	mutex_unlock(&efx->rss_lock);
-	efx->type->filter_table_restore(efx);
-	up_write(&efx->filter_sem);
-	if (efx->type->sriov_reset)
-		efx->type->sriov_reset(efx);
-
-	mutex_unlock(&efx->mac_lock);
-
-	efx_start_all(efx);
-
-	if (efx->type->udp_tnl_push_ports)
-		efx->type->udp_tnl_push_ports(efx);
-
-	return 0;
-
-fail:
-	efx->port_initialized = false;
-
-	mutex_unlock(&efx->rss_lock);
-	up_write(&efx->filter_sem);
-	mutex_unlock(&efx->mac_lock);
-
-	return rc;
-}
-
-/* Reset the NIC using the specified method.  Note that the reset may
- * fail, in which case the card will be left in an unusable state.
- *
- * Caller must hold the rtnl_lock.
- */
-int efx_reset(struct efx_nic *efx, enum reset_type method)
-{
-	int rc, rc2;
-	bool disabled;
-
-	netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
-		   RESET_TYPE(method));
-
-	efx_device_detach_sync(efx);
-	efx_reset_down(efx, method);
-
-	rc = efx->type->reset(efx, method);
-	if (rc) {
-		netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
-		goto out;
-	}
-
-	/* Clear flags for the scopes we covered.  We assume the NIC and
-	 * driver are now quiescent so that there is no race here.
-	 */
-	if (method < RESET_TYPE_MAX_METHOD)
-		efx->reset_pending &= -(1 << (method + 1));
-	else /* it doesn't fit into the well-ordered scope hierarchy */
-		__clear_bit(method, &efx->reset_pending);
-
-	/* Reinitialise bus-mastering, which may have been turned off before
-	 * the reset was scheduled. This is still appropriate, even in the
-	 * RESET_TYPE_DISABLE since this driver generally assumes the hardware
-	 * can respond to requests. */
-	pci_set_master(efx->pci_dev);
-
-out:
-	/* Leave device stopped if necessary */
-	disabled = rc ||
-		method == RESET_TYPE_DISABLE ||
-		method == RESET_TYPE_RECOVER_OR_DISABLE;
-	rc2 = efx_reset_up(efx, method, !disabled);
-	if (rc2) {
-		disabled = true;
-		if (!rc)
-			rc = rc2;
-	}
-
-	if (disabled) {
-		dev_close(efx->net_dev);
-		netif_err(efx, drv, efx->net_dev, "has been disabled\n");
-		efx->state = STATE_DISABLED;
-	} else {
-		netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
-		efx_device_attach_if_not_resetting(efx);
-	}
-	return rc;
-}
-
-/* Try recovery mechanisms.
- * For now only EEH is supported.
- * Returns 0 if the recovery mechanisms are unsuccessful.
- * Returns a non-zero value otherwise.
- */
-int efx_try_recovery(struct efx_nic *efx)
-{
-#ifdef CONFIG_EEH
-	/* A PCI error can occur and not be seen by EEH because nothing
-	 * happens on the PCI bus. In this case the driver may fail and
-	 * schedule a 'recover or reset', leading to this recovery handler.
-	 * Manually call the eeh failure check function.
-	 */
-	struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
-	if (eeh_dev_check_failure(eehdev)) {
-		/* The EEH mechanisms will handle the error and reset the
-		 * device if necessary.
-		 */
-		return 1;
-	}
-#endif
-	return 0;
-}
-
-static void efx_wait_for_bist_end(struct efx_nic *efx)
-{
-	int i;
-
-	for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
-		if (efx_mcdi_poll_reboot(efx))
-			goto out;
-		msleep(BIST_WAIT_DELAY_MS);
-	}
-
-	netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
-out:
-	/* Either way unset the BIST flag. If we found no reboot we probably
-	 * won't recover, but we should try.
-	 */
-	efx->mc_bist_for_other_fn = false;
-}
-
-/* The worker thread exists so that code that cannot sleep can
- * schedule a reset for later.
- */
-static void efx_reset_work(struct work_struct *data)
-{
-	struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
-	unsigned long pending;
-	enum reset_type method;
-
-	pending = READ_ONCE(efx->reset_pending);
-	method = fls(pending) - 1;
-
-	if (method == RESET_TYPE_MC_BIST)
-		efx_wait_for_bist_end(efx);
-
-	if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
-	     method == RESET_TYPE_RECOVER_OR_ALL) &&
-	    efx_try_recovery(efx))
-		return;
-
-	if (!pending)
-		return;
-
-	rtnl_lock();
-
-	/* We checked the state in efx_schedule_reset() but it may
-	 * have changed by now.  Now that we have the RTNL lock,
-	 * it cannot change again.
-	 */
-	if (efx->state == STATE_READY)
-		(void)efx_reset(efx, method);
-
-	rtnl_unlock();
-}
-
-void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
-{
-	enum reset_type method;
-
-	if (efx->state == STATE_RECOVERY) {
-		netif_dbg(efx, drv, efx->net_dev,
-			  "recovering: skip scheduling %s reset\n",
-			  RESET_TYPE(type));
-		return;
-	}
-
-	switch (type) {
-	case RESET_TYPE_INVISIBLE:
-	case RESET_TYPE_ALL:
-	case RESET_TYPE_RECOVER_OR_ALL:
-	case RESET_TYPE_WORLD:
-	case RESET_TYPE_DISABLE:
-	case RESET_TYPE_RECOVER_OR_DISABLE:
-	case RESET_TYPE_DATAPATH:
-	case RESET_TYPE_MC_BIST:
-	case RESET_TYPE_MCDI_TIMEOUT:
-		method = type;
-		netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
-			  RESET_TYPE(method));
-		break;
-	default:
-		method = efx->type->map_reset_reason(type);
-		netif_dbg(efx, drv, efx->net_dev,
-			  "scheduling %s reset for %s\n",
-			  RESET_TYPE(method), RESET_TYPE(type));
-		break;
-	}
-
-	set_bit(method, &efx->reset_pending);
-	smp_mb(); /* ensure we change reset_pending before checking state */
-
-	/* If we're not READY then just leave the flags set as the cue
-	 * to abort probing or reschedule the reset later.
-	 */
-	if (READ_ONCE(efx->state) != STATE_READY)
-		return;
-
-	/* efx_process_channel() will no longer read events once a
-	 * reset is scheduled. So switch back to poll'd MCDI completions. */
-	efx_mcdi_mode_poll(efx);
-
-	queue_work(reset_workqueue, &efx->reset_work);
-}
-
-/**************************************************************************
- *
  * List of NICs we support
  *
  **************************************************************************/
@@ -3169,139 +1029,10 @@ static const struct pci_device_id efx_pci_table[] = {
 
 /**************************************************************************
  *
- * Dummy PHY/MAC operations
- *
- * Can be used for some unimplemented operations
- * Needed so all function pointers are valid and do not have to be tested
- * before use
- *
- **************************************************************************/
-int efx_port_dummy_op_int(struct efx_nic *efx)
-{
-	return 0;
-}
-void efx_port_dummy_op_void(struct efx_nic *efx) {}
-
-static bool efx_port_dummy_op_poll(struct efx_nic *efx)
-{
-	return false;
-}
-
-static const struct efx_phy_operations efx_dummy_phy_operations = {
-	.init		 = efx_port_dummy_op_int,
-	.reconfigure	 = efx_port_dummy_op_int,
-	.poll		 = efx_port_dummy_op_poll,
-	.fini		 = efx_port_dummy_op_void,
-};
-
-/**************************************************************************
- *
  * Data housekeeping
  *
  **************************************************************************/
 
-/* This zeroes out and then fills in the invariants in a struct
- * efx_nic (including all sub-structures).
- */
-static int efx_init_struct(struct efx_nic *efx,
-			   struct pci_dev *pci_dev, struct net_device *net_dev)
-{
-	int rc = -ENOMEM, i;
-
-	/* Initialise common structures */
-	INIT_LIST_HEAD(&efx->node);
-	INIT_LIST_HEAD(&efx->secondary_list);
-	spin_lock_init(&efx->biu_lock);
-#ifdef CONFIG_SFC_MTD
-	INIT_LIST_HEAD(&efx->mtd_list);
-#endif
-	INIT_WORK(&efx->reset_work, efx_reset_work);
-	INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
-	INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
-	efx->pci_dev = pci_dev;
-	efx->msg_enable = debug;
-	efx->state = STATE_UNINIT;
-	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
-
-	efx->net_dev = net_dev;
-	efx->rx_prefix_size = efx->type->rx_prefix_size;
-	efx->rx_ip_align =
-		NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
-	efx->rx_packet_hash_offset =
-		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
-	efx->rx_packet_ts_offset =
-		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
-	INIT_LIST_HEAD(&efx->rss_context.list);
-	mutex_init(&efx->rss_lock);
-	spin_lock_init(&efx->stats_lock);
-	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
-	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
-	BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
-	mutex_init(&efx->mac_lock);
-#ifdef CONFIG_RFS_ACCEL
-	mutex_init(&efx->rps_mutex);
-	spin_lock_init(&efx->rps_hash_lock);
-	/* Failure to allocate is not fatal, but may degrade ARFS performance */
-	efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
-				      sizeof(*efx->rps_hash_table), GFP_KERNEL);
-#endif
-	efx->phy_op = &efx_dummy_phy_operations;
-	efx->mdio.dev = net_dev;
-	INIT_WORK(&efx->mac_work, efx_mac_work);
-	init_waitqueue_head(&efx->flush_wq);
-
-	for (i = 0; i < EFX_MAX_CHANNELS; i++) {
-		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
-		if (!efx->channel[i])
-			goto fail;
-		efx->msi_context[i].efx = efx;
-		efx->msi_context[i].index = i;
-	}
-
-	/* Higher numbered interrupt modes are less capable! */
-	if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
-			 efx->type->min_interrupt_mode)) {
-		rc = -EIO;
-		goto fail;
-	}
-	efx->interrupt_mode = max(efx->type->max_interrupt_mode,
-				  interrupt_mode);
-	efx->interrupt_mode = min(efx->type->min_interrupt_mode,
-				  interrupt_mode);
-
-	/* Would be good to use the net_dev name, but we're too early */
-	snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
-		 pci_name(pci_dev));
-	efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
-	if (!efx->workqueue)
-		goto fail;
-
-	return 0;
-
-fail:
-	efx_fini_struct(efx);
-	return rc;
-}
-
-static void efx_fini_struct(struct efx_nic *efx)
-{
-	int i;
-
-#ifdef CONFIG_RFS_ACCEL
-	kfree(efx->rps_hash_table);
-#endif
-
-	for (i = 0; i < EFX_MAX_CHANNELS; i++)
-		kfree(efx->channel[i]);
-
-	kfree(efx->vpd_sn);
-
-	if (efx->workqueue) {
-		destroy_workqueue(efx->workqueue);
-		efx->workqueue = NULL;
-	}
-}
-
 void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 {
 	u64 n_rx_nodesc_trunc = 0;
@@ -3313,197 +1044,6 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
 }
 
-bool efx_filter_spec_equal(const struct efx_filter_spec *left,
-			   const struct efx_filter_spec *right)
-{
-	if ((left->match_flags ^ right->match_flags) |
-	    ((left->flags ^ right->flags) &
-	     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
-		return false;
-
-	return memcmp(&left->outer_vid, &right->outer_vid,
-		      sizeof(struct efx_filter_spec) -
-		      offsetof(struct efx_filter_spec, outer_vid)) == 0;
-}
-
-u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
-{
-	BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
-	return jhash2((const u32 *)&spec->outer_vid,
-		      (sizeof(struct efx_filter_spec) -
-		       offsetof(struct efx_filter_spec, outer_vid)) / 4,
-		      0);
-}
-
-#ifdef CONFIG_RFS_ACCEL
-bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
-			bool *force)
-{
-	if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
-		/* ARFS is currently updating this entry, leave it */
-		return false;
-	}
-	if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
-		/* ARFS tried and failed to update this, so it's probably out
-		 * of date.  Remove the filter and the ARFS rule entry.
-		 */
-		rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
-		*force = true;
-		return true;
-	} else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
-		/* ARFS has moved on, so old filter is not needed.  Since we did
-		 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
-		 * not be removed by efx_rps_hash_del() subsequently.
-		 */
-		*force = true;
-		return true;
-	}
-	/* Remove it iff ARFS wants to. */
-	return true;
-}
-
-static
-struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
-				       const struct efx_filter_spec *spec)
-{
-	u32 hash = efx_filter_spec_hash(spec);
-
-	lockdep_assert_held(&efx->rps_hash_lock);
-	if (!efx->rps_hash_table)
-		return NULL;
-	return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
-}
-
-struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
-					const struct efx_filter_spec *spec)
-{
-	struct efx_arfs_rule *rule;
-	struct hlist_head *head;
-	struct hlist_node *node;
-
-	head = efx_rps_hash_bucket(efx, spec);
-	if (!head)
-		return NULL;
-	hlist_for_each(node, head) {
-		rule = container_of(node, struct efx_arfs_rule, node);
-		if (efx_filter_spec_equal(spec, &rule->spec))
-			return rule;
-	}
-	return NULL;
-}
-
-struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
-				       const struct efx_filter_spec *spec,
-				       bool *new)
-{
-	struct efx_arfs_rule *rule;
-	struct hlist_head *head;
-	struct hlist_node *node;
-
-	head = efx_rps_hash_bucket(efx, spec);
-	if (!head)
-		return NULL;
-	hlist_for_each(node, head) {
-		rule = container_of(node, struct efx_arfs_rule, node);
-		if (efx_filter_spec_equal(spec, &rule->spec)) {
-			*new = false;
-			return rule;
-		}
-	}
-	rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
-	*new = true;
-	if (rule) {
-		memcpy(&rule->spec, spec, sizeof(rule->spec));
-		hlist_add_head(&rule->node, head);
-	}
-	return rule;
-}
-
-void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
-{
-	struct efx_arfs_rule *rule;
-	struct hlist_head *head;
-	struct hlist_node *node;
-
-	head = efx_rps_hash_bucket(efx, spec);
-	if (WARN_ON(!head))
-		return;
-	hlist_for_each(node, head) {
-		rule = container_of(node, struct efx_arfs_rule, node);
-		if (efx_filter_spec_equal(spec, &rule->spec)) {
-			/* Someone already reused the entry.  We know that if
-			 * this check doesn't fire (i.e. filter_id == REMOVING)
-			 * then the REMOVING mark was put there by our caller,
-			 * because caller is holding a lock on filter table and
-			 * only holders of that lock set REMOVING.
-			 */
-			if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
-				return;
-			hlist_del(node);
-			kfree(rule);
-			return;
-		}
-	}
-	/* We didn't find it. */
-	WARN_ON(1);
-}
-#endif
-
-/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
- * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
- */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
-{
-	struct list_head *head = &efx->rss_context.list;
-	struct efx_rss_context *ctx, *new;
-	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
-
-	WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
-	/* Search for first gap in the numbering */
-	list_for_each_entry(ctx, head, list) {
-		if (ctx->user_id != id)
-			break;
-		id++;
-		/* Check for wrap.  If this happens, we have nearly 2^32
-		 * allocated RSS contexts, which seems unlikely.
-		 */
-		if (WARN_ON_ONCE(!id))
-			return NULL;
-	}
-
-	/* Create the new entry */
-	new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
-	if (!new)
-		return NULL;
-	new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
-	new->rx_hash_udp_4tuple = false;
-
-	/* Insert the new entry into the gap */
-	new->user_id = id;
-	list_add_tail(&new->list, &ctx->list);
-	return new;
-}
-
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
-{
-	struct list_head *head = &efx->rss_context.list;
-	struct efx_rss_context *ctx;
-
-	WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
-	list_for_each_entry(ctx, head, list)
-		if (ctx->user_id == id)
-			return ctx;
-	return NULL;
-}
-
-void efx_free_rss_context_entry(struct efx_rss_context *ctx)
-{
-	list_del(&ctx->list);
-	kfree(ctx);
-}
-
 /**************************************************************************
  *
  * PCI interface
@@ -3519,7 +1059,7 @@ static void efx_pci_remove_main(struct efx_nic *efx)
 	 * are not READY.
 	 */
 	BUG_ON(efx->state == STATE_READY);
-	cancel_work_sync(&efx->reset_work);
+	efx_flush_reset_workqueue(efx);
 
 	efx_disable_interrupts(efx);
 	efx_clear_interrupt_affinity(efx);
@@ -3559,7 +1099,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
 
 	efx_pci_remove_main(efx);
 
-	efx_fini_io(efx);
+	efx_fini_io(efx, efx->type->mem_bar(efx));
 	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
 
 	efx_fini_struct(efx);
@@ -3782,7 +1322,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 		efx_probe_vpd_strings(efx);
 
 	/* Set up basic I/O (BAR mappings etc) */
-	rc = efx_init_io(efx);
+	rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask,
+			 efx->type->mem_map_size(efx));
 	if (rc)
 		goto fail2;
 
@@ -3826,7 +1367,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 	return 0;
 
  fail3:
-	efx_fini_io(efx);
+	efx_fini_io(efx, efx->type->mem_bar(efx));
  fail2:
 	efx_fini_struct(efx);
  fail1:
@@ -3904,7 +1445,7 @@ static int efx_pm_thaw(struct device *dev)
 	rtnl_unlock();
 
 	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
-	queue_work(reset_workqueue, &efx->reset_work);
+	efx_queue_reset_work(efx);
 
 	return 0;
 
@@ -4083,10 +1624,6 @@ static struct pci_driver efx_pci_driver = {
  *
  *************************************************************************/
 
-module_param(interrupt_mode, uint, 0444);
-MODULE_PARM_DESC(interrupt_mode,
-		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
-
 static int __init efx_init_module(void)
 {
 	int rc;
@@ -4103,11 +1640,9 @@ static int __init efx_init_module(void)
 		goto err_sriov;
 #endif
 
-	reset_workqueue = create_singlethread_workqueue("sfc_reset");
-	if (!reset_workqueue) {
-		rc = -ENOMEM;
+	rc = efx_create_reset_workqueue();
+	if (rc)
 		goto err_reset;
-	}
 
 	rc = pci_register_driver(&efx_pci_driver);
 	if (rc < 0)
@@ -4116,7 +1651,7 @@ static int __init efx_init_module(void)
 	return 0;
 
  err_pci:
-	destroy_workqueue(reset_workqueue);
+	efx_destroy_reset_workqueue();
  err_reset:
 #ifdef CONFIG_SFC_SRIOV
 	efx_fini_sriov();
@@ -4132,7 +1667,7 @@ static void __exit efx_exit_module(void)
 	printk(KERN_INFO "Solarflare NET driver unloading\n");
 
 	pci_unregister_driver(&efx_pci_driver);
-	destroy_workqueue(reset_workqueue);
+	efx_destroy_reset_workqueue();
 #ifdef CONFIG_SFC_SRIOV
 	efx_fini_sriov();
 #endif
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 2dd8d5002315..f1bdb04efbe4 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -15,31 +15,17 @@ int efx_net_open(struct net_device *net_dev);
 int efx_net_stop(struct net_device *net_dev);
 
 /* TX */
-int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
-void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
-void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
 void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue);
-void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
 netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
 int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
 		 void *type_data);
-unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
 extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
 
 /* RX */
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
-				    struct efx_rss_context *ctx);
-void efx_rx_config_page_split(struct efx_nic *efx);
-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
-void efx_rx_slow_fill(struct timer_list *t);
 void __efx_rx_packet(struct efx_channel *channel);
 void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
 		   unsigned int n_frags, unsigned int len, u16 flags);
@@ -48,7 +34,6 @@ static inline void efx_rx_flush_packet(struct efx_channel *channel)
 	if (channel->rx_pkt_n_frags)
 		__efx_rx_packet(channel);
 }
-void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
 
 #define EFX_MAX_DMAQ_SIZE 4096UL
 #define EFX_DEFAULT_DMAQ_SIZE 1024UL
@@ -80,8 +65,6 @@ static inline bool efx_rss_enabled(struct efx_nic *efx)
 
 /* Filters */
 
-void efx_mac_reconfigure(struct efx_nic *efx);
-
 /**
  * efx_filter_insert_filter - add or replace a filter
  * @efx: NIC in which to insert the filter
@@ -186,58 +169,17 @@ static inline void efx_filter_rfs_expire(struct work_struct *data)
 static inline void efx_filter_rfs_expire(struct work_struct *data) {}
 #define efx_filter_rfs_enabled() 0
 #endif
-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
-
-bool efx_filter_spec_equal(const struct efx_filter_spec *left,
-			   const struct efx_filter_spec *right);
-u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
-
-#ifdef CONFIG_RFS_ACCEL
-bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
-			bool *force);
-
-struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
-					const struct efx_filter_spec *spec);
-
-/* @new is written to indicate if entry was newly added (true) or if an old
- * entry was found and returned (false).
- */
-struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
-				       const struct efx_filter_spec *spec,
-				       bool *new);
-
-void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
-#endif
 
 /* RSS contexts */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
-void efx_free_rss_context_entry(struct efx_rss_context *ctx);
 static inline bool efx_rss_active(struct efx_rss_context *ctx)
 {
-	return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID;
+	return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID;
 }
 
-/* Channels */
-int efx_channel_dummy_op_int(struct efx_channel *channel);
-void efx_channel_dummy_op_void(struct efx_channel *channel);
-int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
-
-/* Ports */
-int efx_reconfigure_port(struct efx_nic *efx);
-int __efx_reconfigure_port(struct efx_nic *efx);
-
 /* Ethtool support */
 extern const struct ethtool_ops efx_ethtool_ops;
 
-/* Reset handling */
-int efx_reset(struct efx_nic *efx, enum reset_type method);
-void efx_reset_down(struct efx_nic *efx, enum reset_type method);
-int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
-int efx_try_recovery(struct efx_nic *efx);
-
 /* Global */
-void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
 unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs);
 unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks);
 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
@@ -245,8 +187,6 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
 			    bool rx_may_override_tx);
 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
 			    unsigned int *rx_usecs, bool *rx_adaptive);
-void efx_stop_eventq(struct efx_channel *channel);
-void efx_start_eventq(struct efx_channel *channel);
 
 /* Dummy PHY ops for PHY drivers */
 int efx_port_dummy_op_int(struct efx_nic *efx);
@@ -293,9 +233,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel)
 	efx_schedule_channel(channel);
 }
 
-void efx_link_status_changed(struct efx_nic *efx);
-void efx_link_set_advertising(struct efx_nic *efx,
-			      const unsigned long *advertising);
 void efx_link_clear_advertising(struct efx_nic *efx);
 void efx_link_set_wanted_fc(struct efx_nic *efx, u8);
 
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
new file mode 100644
index 000000000000..aeb5e8aa2f2a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -0,0 +1,1234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include "efx_channels.h"
+#include "efx.h"
+#include "efx_common.h"
+#include "tx_common.h"
+#include "rx_common.h"
+#include "nic.h"
+#include "sriov.h"
+
+/* This is the first interrupt mode to try out of:
+ * 0 => MSI-X
+ * 1 => MSI
+ * 2 => legacy
+ */
+static unsigned int interrupt_mode;
+module_param(interrupt_mode, uint, 0444);
+MODULE_PARM_DESC(interrupt_mode,
+		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
+
+/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
+ * i.e. the number of CPUs among which we may distribute simultaneous
+ * interrupt handling.
+ *
+ * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
+ * The default (0) means to assign an interrupt to each core.
+ */
+static unsigned int rss_cpus;
+module_param(rss_cpus, uint, 0444);
+MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
+
+static unsigned int irq_adapt_low_thresh = 8000;
+module_param(irq_adapt_low_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_low_thresh,
+		 "Threshold score for reducing IRQ moderation");
+
+static unsigned int irq_adapt_high_thresh = 16000;
+module_param(irq_adapt_high_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_high_thresh,
+		 "Threshold score for increasing IRQ moderation");
+
+/* This is the weight assigned to each of the (per-channel) virtual
+ * NAPI devices.
+ */
+static int napi_weight = 64;
+
+/***************
+ * Housekeeping
+ ***************/
+
+int efx_channel_dummy_op_int(struct efx_channel *channel)
+{
+	return 0;
+}
+
+void efx_channel_dummy_op_void(struct efx_channel *channel)
+{
+}
+
+static const struct efx_channel_type efx_default_channel_type = {
+	.pre_probe		= efx_channel_dummy_op_int,
+	.post_remove		= efx_channel_dummy_op_void,
+	.get_name		= efx_get_channel_name,
+	.copy			= efx_copy_channel,
+	.want_txqs		= efx_default_channel_want_txqs,
+	.keep_eventq		= false,
+	.want_pio		= true,
+};
+
+/*************
+ * INTERRUPTS
+ *************/
+
+static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
+{
+	cpumask_var_t thread_mask;
+	unsigned int count;
+	int cpu;
+
+	if (rss_cpus) {
+		count = rss_cpus;
+	} else {
+		if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
+			netif_warn(efx, probe, efx->net_dev,
+				   "RSS disabled due to allocation failure\n");
+			return 1;
+		}
+
+		count = 0;
+		for_each_online_cpu(cpu) {
+			if (!cpumask_test_cpu(cpu, thread_mask)) {
+				++count;
+				cpumask_or(thread_mask, thread_mask,
+					   topology_sibling_cpumask(cpu));
+			}
+		}
+
+		free_cpumask_var(thread_mask);
+	}
+
+	if (count > EFX_MAX_RX_QUEUES) {
+		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+			       "Reducing number of rx queues from %u to %u.\n",
+			       count, EFX_MAX_RX_QUEUES);
+		count = EFX_MAX_RX_QUEUES;
+	}
+
+	/* If RSS is requested for the PF *and* VFs then we can't write RSS
+	 * table entries that are inaccessible to VFs
+	 */
+#ifdef CONFIG_SFC_SRIOV
+	if (efx->type->sriov_wanted) {
+		if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
+		    count > efx_vf_size(efx)) {
+			netif_warn(efx, probe, efx->net_dev,
+				   "Reducing number of RSS channels from %u to %u for "
+				   "VF support. Increase vf-msix-limit to use more "
+				   "channels on the PF.\n",
+				   count, efx_vf_size(efx));
+			count = efx_vf_size(efx);
+		}
+	}
+#endif
+
+	return count;
+}
+
+static int efx_allocate_msix_channels(struct efx_nic *efx,
+				      unsigned int max_channels,
+				      unsigned int extra_channels,
+				      unsigned int parallelism)
+{
+	unsigned int n_channels = parallelism;
+	int vec_count;
+	int n_xdp_tx;
+	int n_xdp_ev;
+
+	if (efx_separate_tx_channels)
+		n_channels *= 2;
+	n_channels += extra_channels;
+
+	/* To allow XDP transmit to happen from arbitrary NAPI contexts
+	 * we allocate a TX queue per CPU. We share event queues across
+	 * multiple tx queues, assuming tx and ev queues are both
+	 * maximum size.
+	 */
+
+	n_xdp_tx = num_possible_cpus();
+	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
+
+	vec_count = pci_msix_vec_count(efx->pci_dev);
+	if (vec_count < 0)
+		return vec_count;
+
+	max_channels = min_t(unsigned int, vec_count, max_channels);
+
+	/* Check resources.
+	 * We need a channel per event queue, plus a VI per tx queue.
+	 * This may be more pessimistic than it needs to be.
+	 */
+	if (n_channels + n_xdp_ev > max_channels) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
+			  n_xdp_ev, n_channels, max_channels);
+		efx->n_xdp_channels = 0;
+		efx->xdp_tx_per_channel = 0;
+		efx->xdp_tx_queue_count = 0;
+	} else {
+		efx->n_xdp_channels = n_xdp_ev;
+		efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
+		efx->xdp_tx_queue_count = n_xdp_tx;
+		n_channels += n_xdp_ev;
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Allocating %d TX and %d event queues for XDP\n",
+			  n_xdp_tx, n_xdp_ev);
+	}
+
+	if (vec_count < n_channels) {
+		netif_err(efx, drv, efx->net_dev,
+			  "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
+			  vec_count, n_channels);
+		netif_err(efx, drv, efx->net_dev,
+			  "WARNING: Performance may be reduced.\n");
+		n_channels = vec_count;
+	}
+
+	n_channels = min(n_channels, max_channels);
+
+	efx->n_channels = n_channels;
+
+	/* Ignore XDP tx channels when creating rx channels. */
+	n_channels -= efx->n_xdp_channels;
+
+	if (efx_separate_tx_channels) {
+		efx->n_tx_channels =
+			min(max(n_channels / 2, 1U),
+			    efx->max_tx_channels);
+		efx->tx_channel_offset =
+			n_channels - efx->n_tx_channels;
+		efx->n_rx_channels =
+			max(n_channels -
+			    efx->n_tx_channels, 1U);
+	} else {
+		efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
+		efx->tx_channel_offset = 0;
+		efx->n_rx_channels = n_channels;
+	}
+
+	efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
+	efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
+
+	efx->xdp_channel_offset = n_channels;
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "Allocating %u RX channels\n",
+		  efx->n_rx_channels);
+
+	return efx->n_channels;
+}
+
+/* Probe the number and type of interrupts we are able to obtain, and
+ * the resulting numbers of channels and RX queues.
+ */
+int efx_probe_interrupts(struct efx_nic *efx)
+{
+	unsigned int extra_channels = 0;
+	unsigned int rss_spread;
+	unsigned int i, j;
+	int rc;
+
+	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
+		if (efx->extra_channel_type[i])
+			++extra_channels;
+
+	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+		unsigned int parallelism = efx_wanted_parallelism(efx);
+		struct msix_entry xentries[EFX_MAX_CHANNELS];
+		unsigned int n_channels;
+
+		rc = efx_allocate_msix_channels(efx, efx->max_channels,
+						extra_channels, parallelism);
+		if (rc >= 0) {
+			n_channels = rc;
+			for (i = 0; i < n_channels; i++)
+				xentries[i].entry = i;
+			rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
+						   n_channels);
+		}
+		if (rc < 0) {
+			/* Fall back to single channel MSI */
+			netif_err(efx, drv, efx->net_dev,
+				  "could not enable MSI-X\n");
+			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
+				efx->interrupt_mode = EFX_INT_MODE_MSI;
+			else
+				return rc;
+		} else if (rc < n_channels) {
+			netif_err(efx, drv, efx->net_dev,
+				  "WARNING: Insufficient MSI-X vectors"
+				  " available (%d < %u).\n", rc, n_channels);
+			netif_err(efx, drv, efx->net_dev,
+				  "WARNING: Performance may be reduced.\n");
+			n_channels = rc;
+		}
+
+		if (rc > 0) {
+			for (i = 0; i < efx->n_channels; i++)
+				efx_get_channel(efx, i)->irq =
+					xentries[i].vector;
+		}
+	}
+
+	/* Try single interrupt MSI */
+	if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
+		efx->n_channels = 1;
+		efx->n_rx_channels = 1;
+		efx->n_tx_channels = 1;
+		efx->n_xdp_channels = 0;
+		efx->xdp_channel_offset = efx->n_channels;
+		rc = pci_enable_msi(efx->pci_dev);
+		if (rc == 0) {
+			efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
+		} else {
+			netif_err(efx, drv, efx->net_dev,
+				  "could not enable MSI\n");
+			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
+				efx->interrupt_mode = EFX_INT_MODE_LEGACY;
+			else
+				return rc;
+		}
+	}
+
+	/* Assume legacy interrupts */
+	if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
+		efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
+		efx->n_rx_channels = 1;
+		efx->n_tx_channels = 1;
+		efx->n_xdp_channels = 0;
+		efx->xdp_channel_offset = efx->n_channels;
+		efx->legacy_irq = efx->pci_dev->irq;
+	}
+
+	/* Assign extra channels if possible, before XDP channels */
+	efx->n_extra_tx_channels = 0;
+	j = efx->xdp_channel_offset;
+	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
+		if (!efx->extra_channel_type[i])
+			continue;
+		if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
+			efx->extra_channel_type[i]->handle_no_channel(efx);
+		} else {
+			--j;
+			efx_get_channel(efx, j)->type =
+				efx->extra_channel_type[i];
+			if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
+				efx->n_extra_tx_channels++;
+		}
+	}
+
+	rss_spread = efx->n_rx_channels;
+	/* RSS might be usable on VFs even if it is disabled on the PF */
+#ifdef CONFIG_SFC_SRIOV
+	if (efx->type->sriov_wanted) {
+		efx->rss_spread = ((rss_spread > 1 ||
+				    !efx->type->sriov_wanted(efx)) ?
+				   rss_spread : efx_vf_size(efx));
+		return 0;
+	}
+#endif
+	efx->rss_spread = rss_spread;
+
+	return 0;
+}
+
+#if defined(CONFIG_SMP)
+void efx_set_interrupt_affinity(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+	unsigned int cpu;
+
+	efx_for_each_channel(channel, efx) {
+		cpu = cpumask_local_spread(channel->channel,
+					   pcibus_to_node(efx->pci_dev->bus));
+		irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
+	}
+}
+
+void efx_clear_interrupt_affinity(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx)
+		irq_set_affinity_hint(channel->irq, NULL);
+}
+#else
+void
+efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+
+void
+efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+#endif /* CONFIG_SMP */
+
+void efx_remove_interrupts(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	/* Remove MSI/MSI-X interrupts */
+	efx_for_each_channel(channel, efx)
+		channel->irq = 0;
+	pci_disable_msi(efx->pci_dev);
+	pci_disable_msix(efx->pci_dev);
+
+	/* Remove legacy interrupt */
+	efx->legacy_irq = 0;
+}
+
+/***************
+ * EVENT QUEUES
+ ***************/
+
+/* Create event queue
+ * Event queue memory allocations are done only once.  If the channel
+ * is reset, the memory buffer will be reused; this guards against
+ * errors during channel reset and also simplifies interrupt handling.
+ */
+int efx_probe_eventq(struct efx_channel *channel)
+{
+	struct efx_nic *efx = channel->efx;
+	unsigned long entries;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "chan %d create event queue\n", channel->channel);
+
+	/* Build an event queue with room for one event per tx and rx buffer,
+	 * plus some extra for link state events and MCDI completions.
+	 */
+	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
+	EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
+	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
+
+	return efx_nic_probe_eventq(channel);
+}
+
+/* Prepare channel's event queue */
+int efx_init_eventq(struct efx_channel *channel)
+{
+	struct efx_nic *efx = channel->efx;
+	int rc;
+
+	EFX_WARN_ON_PARANOID(channel->eventq_init);
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "chan %d init event queue\n", channel->channel);
+
+	rc = efx_nic_init_eventq(channel);
+	if (rc == 0) {
+		efx->type->push_irq_moderation(channel);
+		channel->eventq_read_ptr = 0;
+		channel->eventq_init = true;
+	}
+	return rc;
+}
+
+/* Enable event queue processing and NAPI */
+void efx_start_eventq(struct efx_channel *channel)
+{
+	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
+		  "chan %d start event queue\n", channel->channel);
+
+	/* Make sure the NAPI handler sees the enabled flag set */
+	channel->enabled = true;
+	smp_wmb();
+
+	napi_enable(&channel->napi_str);
+	efx_nic_eventq_read_ack(channel);
+}
+
+/* Disable event queue processing and NAPI */
+void efx_stop_eventq(struct efx_channel *channel)
+{
+	if (!channel->enabled)
+		return;
+
+	napi_disable(&channel->napi_str);
+	channel->enabled = false;
+}
+
+void efx_fini_eventq(struct efx_channel *channel)
+{
+	if (!channel->eventq_init)
+		return;
+
+	netif_dbg(channel->efx, drv, channel->efx->net_dev,
+		  "chan %d fini event queue\n", channel->channel);
+
+	efx_nic_fini_eventq(channel);
+	channel->eventq_init = false;
+}
+
+void efx_remove_eventq(struct efx_channel *channel)
+{
+	netif_dbg(channel->efx, drv, channel->efx->net_dev,
+		  "chan %d remove event queue\n", channel->channel);
+
+	efx_nic_remove_eventq(channel);
+}
+
+/**************************************************************************
+ *
+ * Channel handling
+ *
+ *************************************************************************/
+
+/* Allocate and initialise a channel structure. */
+struct efx_channel *
+efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
+{
+	struct efx_rx_queue *rx_queue;
+	struct efx_tx_queue *tx_queue;
+	struct efx_channel *channel;
+	int j;
+
+	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
+	if (!channel)
+		return NULL;
+
+	channel->efx = efx;
+	channel->channel = i;
+	channel->type = &efx_default_channel_type;
+
+	for (j = 0; j < EFX_TXQ_TYPES; j++) {
+		tx_queue = &channel->tx_queue[j];
+		tx_queue->efx = efx;
+		tx_queue->queue = i * EFX_TXQ_TYPES + j;
+		tx_queue->channel = channel;
+	}
+
+#ifdef CONFIG_RFS_ACCEL
+	INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
+	rx_queue = &channel->rx_queue;
+	rx_queue->efx = efx;
+	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+
+	return channel;
+}
+
+int efx_init_channels(struct efx_nic *efx)
+{
+	unsigned int i;
+
+	for (i = 0; i < EFX_MAX_CHANNELS; i++) {
+		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
+		if (!efx->channel[i])
+			return -ENOMEM;
+		efx->msi_context[i].efx = efx;
+		efx->msi_context[i].index = i;
+	}
+
+	/* Higher numbered interrupt modes are less capable! */
+	if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
+			 efx->type->min_interrupt_mode)) {
+		return -EIO;
+	}
+	efx->interrupt_mode = max(efx->type->max_interrupt_mode,
+				  interrupt_mode);
+	efx->interrupt_mode = min(efx->type->min_interrupt_mode,
+				  interrupt_mode);
+
+	return 0;
+}
+
+void efx_fini_channels(struct efx_nic *efx)
+{
+	unsigned int i;
+
+	for (i = 0; i < EFX_MAX_CHANNELS; i++)
+		if (efx->channel[i]) {
+			kfree(efx->channel[i]);
+			efx->channel[i] = NULL;
+		}
+}
+
+/* Allocate and initialise a channel structure, copying parameters
+ * (but not resources) from an old channel structure.
+ */
+struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
+{
+	struct efx_rx_queue *rx_queue;
+	struct efx_tx_queue *tx_queue;
+	struct efx_channel *channel;
+	int j;
+
+	channel = kmalloc(sizeof(*channel), GFP_KERNEL);
+	if (!channel)
+		return NULL;
+
+	*channel = *old_channel;
+
+	channel->napi_dev = NULL;
+	INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
+	channel->napi_str.napi_id = 0;
+	channel->napi_str.state = 0;
+	memset(&channel->eventq, 0, sizeof(channel->eventq));
+
+	for (j = 0; j < EFX_TXQ_TYPES; j++) {
+		tx_queue = &channel->tx_queue[j];
+		if (tx_queue->channel)
+			tx_queue->channel = channel;
+		tx_queue->buffer = NULL;
+		memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
+	}
+
+	rx_queue = &channel->rx_queue;
+	rx_queue->buffer = NULL;
+	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
+	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+#ifdef CONFIG_RFS_ACCEL
+	INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
+	return channel;
+}
+
+static int efx_probe_channel(struct efx_channel *channel)
+{
+	struct efx_tx_queue *tx_queue;
+	struct efx_rx_queue *rx_queue;
+	int rc;
+
+	netif_dbg(channel->efx, probe, channel->efx->net_dev,
+		  "creating channel %d\n", channel->channel);
+
+	rc = channel->type->pre_probe(channel);
+	if (rc)
+		goto fail;
+
+	rc = efx_probe_eventq(channel);
+	if (rc)
+		goto fail;
+
+	efx_for_each_channel_tx_queue(tx_queue, channel) {
+		rc = efx_probe_tx_queue(tx_queue);
+		if (rc)
+			goto fail;
+	}
+
+	efx_for_each_channel_rx_queue(rx_queue, channel) {
+		rc = efx_probe_rx_queue(rx_queue);
+		if (rc)
+			goto fail;
+	}
+
+	channel->rx_list = NULL;
+
+	return 0;
+
+fail:
+	efx_remove_channel(channel);
+	return rc;
+}
+
+void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
+{
+	struct efx_nic *efx = channel->efx;
+	const char *type;
+	int number;
+
+	number = channel->channel;
+
+	if (number >= efx->xdp_channel_offset &&
+	    !WARN_ON_ONCE(!efx->n_xdp_channels)) {
+		type = "-xdp";
+		number -= efx->xdp_channel_offset;
+	} else if (efx->tx_channel_offset == 0) {
+		type = "";
+	} else if (number < efx->tx_channel_offset) {
+		type = "-rx";
+	} else {
+		type = "-tx";
+		number -= efx->tx_channel_offset;
+	}
+	snprintf(buf, len, "%s%s-%d", efx->name, type, number);
+}
+
+void efx_set_channel_names(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx)
+		channel->type->get_name(channel,
+					efx->msi_context[channel->channel].name,
+					sizeof(efx->msi_context[0].name));
+}
+
+int efx_probe_channels(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+	int rc;
+
+	/* Restart special buffer allocation */
+	efx->next_buffer_table = 0;
+
+	/* Probe channels in reverse, so that any 'extra' channels
+	 * use the start of the buffer table. This allows the traffic
+	 * channels to be resized without moving them or wasting the
+	 * entries before them.
+	 */
+	efx_for_each_channel_rev(channel, efx) {
+		rc = efx_probe_channel(channel);
+		if (rc) {
+			netif_err(efx, probe, efx->net_dev,
+				  "failed to create channel %d\n",
+				  channel->channel);
+			goto fail;
+		}
+	}
+	efx_set_channel_names(efx);
+
+	return 0;
+
+fail:
+	efx_remove_channels(efx);
+	return rc;
+}
+
+void efx_remove_channel(struct efx_channel *channel)
+{
+	struct efx_tx_queue *tx_queue;
+	struct efx_rx_queue *rx_queue;
+
+	netif_dbg(channel->efx, drv, channel->efx->net_dev,
+		  "destroy chan %d\n", channel->channel);
+
+	efx_for_each_channel_rx_queue(rx_queue, channel)
+		efx_remove_rx_queue(rx_queue);
+	efx_for_each_possible_channel_tx_queue(tx_queue, channel)
+		efx_remove_tx_queue(tx_queue);
+	efx_remove_eventq(channel);
+	channel->type->post_remove(channel);
+}
+
+void efx_remove_channels(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx)
+		efx_remove_channel(channel);
+
+	kfree(efx->xdp_tx_queues);
+}
+
+int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
+{
+	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
+	unsigned int i, next_buffer_table = 0;
+	u32 old_rxq_entries, old_txq_entries;
+	int rc, rc2;
+
+	rc = efx_check_disabled(efx);
+	if (rc)
+		return rc;
+
+	/* Not all channels should be reallocated. We must avoid
+	 * reallocating their buffer table entries.
+	 */
+	efx_for_each_channel(channel, efx) {
+		struct efx_rx_queue *rx_queue;
+		struct efx_tx_queue *tx_queue;
+
+		if (channel->type->copy)
+			continue;
+		next_buffer_table = max(next_buffer_table,
+					channel->eventq.index +
+					channel->eventq.entries);
+		efx_for_each_channel_rx_queue(rx_queue, channel)
+			next_buffer_table = max(next_buffer_table,
+						rx_queue->rxd.index +
+						rx_queue->rxd.entries);
+		efx_for_each_channel_tx_queue(tx_queue, channel)
+			next_buffer_table = max(next_buffer_table,
+						tx_queue->txd.index +
+						tx_queue->txd.entries);
+	}
+
+	efx_device_detach_sync(efx);
+	efx_stop_all(efx);
+	efx_soft_disable_interrupts(efx);
+
+	/* Clone channels (where possible) */
+	memset(other_channel, 0, sizeof(other_channel));
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		if (channel->type->copy)
+			channel = channel->type->copy(channel);
+		if (!channel) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		other_channel[i] = channel;
+	}
+
+	/* Swap entry counts and channel pointers */
+	old_rxq_entries = efx->rxq_entries;
+	old_txq_entries = efx->txq_entries;
+	efx->rxq_entries = rxq_entries;
+	efx->txq_entries = txq_entries;
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		efx->channel[i] = other_channel[i];
+		other_channel[i] = channel;
+	}
+
+	/* Restart buffer table allocation */
+	efx->next_buffer_table = next_buffer_table;
+
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		if (!channel->type->copy)
+			continue;
+		rc = efx_probe_channel(channel);
+		if (rc)
+			goto rollback;
+		efx_init_napi_channel(efx->channel[i]);
+	}
+
+out:
+	/* Destroy unused channel structures */
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = other_channel[i];
+		if (channel && channel->type->copy) {
+			efx_fini_napi_channel(channel);
+			efx_remove_channel(channel);
+			kfree(channel);
+		}
+	}
+
+	rc2 = efx_soft_enable_interrupts(efx);
+	if (rc2) {
+		rc = rc ? rc : rc2;
+		netif_err(efx, drv, efx->net_dev,
+			  "unable to restart interrupts on channel reallocation\n");
+		efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+	} else {
+		efx_start_all(efx);
+		efx_device_attach_if_not_resetting(efx);
+	}
+	return rc;
+
+rollback:
+	/* Swap back */
+	efx->rxq_entries = old_rxq_entries;
+	efx->txq_entries = old_txq_entries;
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		efx->channel[i] = other_channel[i];
+		other_channel[i] = channel;
+	}
+	goto out;
+}
+
+int efx_set_channels(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+	struct efx_tx_queue *tx_queue;
+	int xdp_queue_number;
+
+	efx->tx_channel_offset =
+		efx_separate_tx_channels ?
+		efx->n_channels - efx->n_tx_channels : 0;
+
+	if (efx->xdp_tx_queue_count) {
+		EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
+
+		/* Allocate array for XDP TX queue lookup. */
+		efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
+					     sizeof(*efx->xdp_tx_queues),
+					     GFP_KERNEL);
+		if (!efx->xdp_tx_queues)
+			return -ENOMEM;
+	}
+
+	/* We need to mark which channels really have RX and TX
+	 * queues, and adjust the TX queue numbers if we have separate
+	 * RX-only and TX-only channels.
+	 */
+	xdp_queue_number = 0;
+	efx_for_each_channel(channel, efx) {
+		if (channel->channel < efx->n_rx_channels)
+			channel->rx_queue.core_index = channel->channel;
+		else
+			channel->rx_queue.core_index = -1;
+
+		efx_for_each_channel_tx_queue(tx_queue, channel) {
+			tx_queue->queue -= (efx->tx_channel_offset *
+					    EFX_TXQ_TYPES);
+
+			if (efx_channel_is_xdp_tx(channel) &&
+			    xdp_queue_number < efx->xdp_tx_queue_count) {
+				efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
+				xdp_queue_number++;
+			}
+		}
+	}
+	return 0;
+}
+
+bool efx_default_channel_want_txqs(struct efx_channel *channel)
+{
+	return channel->channel - channel->efx->tx_channel_offset <
+		channel->efx->n_tx_channels;
+}
+
+/*************
+ * START/STOP
+ *************/
+
+int efx_soft_enable_interrupts(struct efx_nic *efx)
+{
+	struct efx_channel *channel, *end_channel;
+	int rc;
+
+	BUG_ON(efx->state == STATE_DISABLED);
+
+	efx->irq_soft_enabled = true;
+	smp_wmb();
+
+	efx_for_each_channel(channel, efx) {
+		if (!channel->type->keep_eventq) {
+			rc = efx_init_eventq(channel);
+			if (rc)
+				goto fail;
+		}
+		efx_start_eventq(channel);
+	}
+
+	efx_mcdi_mode_event(efx);
+
+	return 0;
+fail:
+	end_channel = channel;
+	efx_for_each_channel(channel, efx) {
+		if (channel == end_channel)
+			break;
+		efx_stop_eventq(channel);
+		if (!channel->type->keep_eventq)
+			efx_fini_eventq(channel);
+	}
+
+	return rc;
+}
+
+void efx_soft_disable_interrupts(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	if (efx->state == STATE_DISABLED)
+		return;
+
+	efx_mcdi_mode_poll(efx);
+
+	efx->irq_soft_enabled = false;
+	smp_wmb();
+
+	if (efx->legacy_irq)
+		synchronize_irq(efx->legacy_irq);
+
+	efx_for_each_channel(channel, efx) {
+		if (channel->irq)
+			synchronize_irq(channel->irq);
+
+		efx_stop_eventq(channel);
+		if (!channel->type->keep_eventq)
+			efx_fini_eventq(channel);
+	}
+
+	/* Flush the asynchronous MCDI request queue */
+	efx_mcdi_flush_async(efx);
+}
+
+int efx_enable_interrupts(struct efx_nic *efx)
+{
+	struct efx_channel *channel, *end_channel;
+	int rc;
+
+	/* TODO: Is this really a bug? */
+	BUG_ON(efx->state == STATE_DISABLED);
+
+	if (efx->eeh_disabled_legacy_irq) {
+		enable_irq(efx->legacy_irq);
+		efx->eeh_disabled_legacy_irq = false;
+	}
+
+	efx->type->irq_enable_master(efx);
+
+	efx_for_each_channel(channel, efx) {
+		if (channel->type->keep_eventq) {
+			rc = efx_init_eventq(channel);
+			if (rc)
+				goto fail;
+		}
+	}
+
+	rc = efx_soft_enable_interrupts(efx);
+	if (rc)
+		goto fail;
+
+	return 0;
+
+fail:
+	end_channel = channel;
+	efx_for_each_channel(channel, efx) {
+		if (channel == end_channel)
+			break;
+		if (channel->type->keep_eventq)
+			efx_fini_eventq(channel);
+	}
+
+	efx->type->irq_disable_non_ev(efx);
+
+	return rc;
+}
+
+void efx_disable_interrupts(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_soft_disable_interrupts(efx);
+
+	efx_for_each_channel(channel, efx) {
+		if (channel->type->keep_eventq)
+			efx_fini_eventq(channel);
+	}
+
+	efx->type->irq_disable_non_ev(efx);
+}
+
+void efx_start_channels(struct efx_nic *efx)
+{
+	struct efx_tx_queue *tx_queue;
+	struct efx_rx_queue *rx_queue;
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx) {
+		efx_for_each_channel_tx_queue(tx_queue, channel) {
+			efx_init_tx_queue(tx_queue);
+			atomic_inc(&efx->active_queues);
+		}
+
+		efx_for_each_channel_rx_queue(rx_queue, channel) {
+			efx_init_rx_queue(rx_queue);
+			atomic_inc(&efx->active_queues);
+			efx_stop_eventq(channel);
+			efx_fast_push_rx_descriptors(rx_queue, false);
+			efx_start_eventq(channel);
+		}
+
+		WARN_ON(channel->rx_pkt_n_frags);
+	}
+}
+
+void efx_stop_channels(struct efx_nic *efx)
+{
+	struct efx_tx_queue *tx_queue;
+	struct efx_rx_queue *rx_queue;
+	struct efx_channel *channel;
+	int rc = 0;
+
+	/* Stop RX refill */
+	efx_for_each_channel(channel, efx) {
+		efx_for_each_channel_rx_queue(rx_queue, channel)
+			rx_queue->refill_enabled = false;
+	}
+
+	efx_for_each_channel(channel, efx) {
+		/* RX packet processing is pipelined, so wait for the
+		 * NAPI handler to complete.  At least event queue 0
+		 * might be kept active by non-data events, so don't
+		 * use napi_synchronize() but actually disable NAPI
+		 * temporarily.
+		 */
+		if (efx_channel_has_rx_queue(channel)) {
+			efx_stop_eventq(channel);
+			efx_start_eventq(channel);
+		}
+	}
+
+	if (efx->type->fini_dmaq)
+		rc = efx->type->fini_dmaq(efx);
+
+	if (rc) {
+		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
+	} else {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "successfully flushed all queues\n");
+	}
+
+	efx_for_each_channel(channel, efx) {
+		efx_for_each_channel_rx_queue(rx_queue, channel)
+			efx_fini_rx_queue(rx_queue);
+		efx_for_each_possible_channel_tx_queue(tx_queue, channel)
+			efx_fini_tx_queue(tx_queue);
+	}
+}
+
+/**************************************************************************
+ *
+ * NAPI interface
+ *
+ *************************************************************************/
+
+/* Process channel's event queue
+ *
+ * This function is responsible for processing the event queue of a
+ * single channel.  The caller must guarantee that this function will
+ * never be concurrently called more than once on the same channel,
+ * though different channels may be being processed concurrently.
+ */
+static int efx_process_channel(struct efx_channel *channel, int budget)
+{
+	struct efx_tx_queue *tx_queue;
+	struct list_head rx_list;
+	int spent;
+
+	if (unlikely(!channel->enabled))
+		return 0;
+
+	/* Prepare the batch receive list */
+	EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
+	INIT_LIST_HEAD(&rx_list);
+	channel->rx_list = &rx_list;
+
+	efx_for_each_channel_tx_queue(tx_queue, channel) {
+		tx_queue->pkts_compl = 0;
+		tx_queue->bytes_compl = 0;
+	}
+
+	spent = efx_nic_process_eventq(channel, budget);
+	if (spent && efx_channel_has_rx_queue(channel)) {
+		struct efx_rx_queue *rx_queue =
+			efx_channel_get_rx_queue(channel);
+
+		efx_rx_flush_packet(channel);
+		efx_fast_push_rx_descriptors(rx_queue, true);
+	}
+
+	/* Update BQL */
+	efx_for_each_channel_tx_queue(tx_queue, channel) {
+		if (tx_queue->bytes_compl) {
+			netdev_tx_completed_queue(tx_queue->core_txq,
+						  tx_queue->pkts_compl,
+						  tx_queue->bytes_compl);
+		}
+	}
+
+	/* Receive any packets we queued up */
+	netif_receive_skb_list(channel->rx_list);
+	channel->rx_list = NULL;
+
+	return spent;
+}
+
+static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
+{
+	int step = efx->irq_mod_step_us;
+
+	if (channel->irq_mod_score < irq_adapt_low_thresh) {
+		if (channel->irq_moderation_us > step) {
+			channel->irq_moderation_us -= step;
+			efx->type->push_irq_moderation(channel);
+		}
+	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+		if (channel->irq_moderation_us <
+		    efx->irq_rx_moderation_us) {
+			channel->irq_moderation_us += step;
+			efx->type->push_irq_moderation(channel);
+		}
+	}
+
+	channel->irq_count = 0;
+	channel->irq_mod_score = 0;
+}
+
+/* NAPI poll handler
+ *
+ * NAPI guarantees serialisation of polls of the same device, which
+ * provides the guarantee required by efx_process_channel().
+ */
+static int efx_poll(struct napi_struct *napi, int budget)
+{
+	struct efx_channel *channel =
+		container_of(napi, struct efx_channel, napi_str);
+	struct efx_nic *efx = channel->efx;
+	int spent;
+
+	netif_vdbg(efx, intr, efx->net_dev,
+		   "channel %d NAPI poll executing on CPU %d\n",
+		   channel->channel, raw_smp_processor_id());
+
+	spent = efx_process_channel(channel, budget);
+
+	xdp_do_flush_map();
+
+	if (spent < budget) {
+		if (efx_channel_has_rx_queue(channel) &&
+		    efx->irq_rx_adaptive &&
+		    unlikely(++channel->irq_count == 1000)) {
+			efx_update_irq_mod(efx, channel);
+		}
+
+#ifdef CONFIG_RFS_ACCEL
+		/* Perhaps expire some ARFS filters */
+		mod_delayed_work(system_wq, &channel->filter_work, 0);
+#endif
+
+		/* There is no race here; although napi_disable() will
+		 * only wait for napi_complete(), this isn't a problem
+		 * since efx_nic_eventq_read_ack() will have no effect if
+		 * interrupts have already been disabled.
+		 */
+		if (napi_complete_done(napi, spent))
+			efx_nic_eventq_read_ack(channel);
+	}
+
+	return spent;
+}
+
+void efx_init_napi_channel(struct efx_channel *channel)
+{
+	struct efx_nic *efx = channel->efx;
+
+	channel->napi_dev = efx->net_dev;
+	netif_napi_add(channel->napi_dev, &channel->napi_str,
+		       efx_poll, napi_weight);
+}
+
+void efx_init_napi(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx)
+		efx_init_napi_channel(channel);
+}
+
+void efx_fini_napi_channel(struct efx_channel *channel)
+{
+	if (channel->napi_dev)
+		netif_napi_del(&channel->napi_str);
+
+	channel->napi_dev = NULL;
+}
+
+void efx_fini_napi(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx)
+		efx_fini_napi_channel(channel);
+}
diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h
new file mode 100644
index 000000000000..8d7b8c4142d7
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_channels.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_CHANNELS_H
+#define EFX_CHANNELS_H
+
+int efx_probe_interrupts(struct efx_nic *efx);
+void efx_remove_interrupts(struct efx_nic *efx);
+int efx_soft_enable_interrupts(struct efx_nic *efx);
+void efx_soft_disable_interrupts(struct efx_nic *efx);
+int efx_enable_interrupts(struct efx_nic *efx);
+void efx_disable_interrupts(struct efx_nic *efx);
+
+void efx_set_interrupt_affinity(struct efx_nic *efx);
+void efx_clear_interrupt_affinity(struct efx_nic *efx);
+
+int efx_probe_eventq(struct efx_channel *channel);
+int efx_init_eventq(struct efx_channel *channel);
+void efx_start_eventq(struct efx_channel *channel);
+void efx_stop_eventq(struct efx_channel *channel);
+void efx_fini_eventq(struct efx_channel *channel);
+void efx_remove_eventq(struct efx_channel *channel);
+
+struct efx_channel *
+efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel);
+int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
+void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len);
+void efx_set_channel_names(struct efx_nic *efx);
+int efx_init_channels(struct efx_nic *efx);
+int efx_probe_channels(struct efx_nic *efx);
+int efx_set_channels(struct efx_nic *efx);
+bool efx_default_channel_want_txqs(struct efx_channel *channel);
+void efx_remove_channel(struct efx_channel *channel);
+void efx_remove_channels(struct efx_nic *efx);
+void efx_fini_channels(struct efx_nic *efx);
+struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel);
+void efx_start_channels(struct efx_nic *efx);
+void efx_stop_channels(struct efx_nic *efx);
+
+void efx_init_napi_channel(struct efx_channel *channel);
+void efx_init_napi(struct efx_nic *efx);
+void efx_fini_napi_channel(struct efx_channel *channel);
+void efx_fini_napi(struct efx_nic *efx);
+
+int efx_channel_dummy_op_int(struct efx_channel *channel);
+void efx_channel_dummy_op_void(struct efx_channel *channel);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
new file mode 100644
index 000000000000..b0d76bc19673
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -0,0 +1,1102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include "efx_common.h"
+#include "efx_channels.h"
+#include "efx.h"
+#include "mcdi.h"
+#include "selftest.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "nic.h"
+#include "io.h"
+#include "mcdi_pcol.h"
+
+static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
+			     NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
+			     NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
+			     NETIF_MSG_TX_ERR | NETIF_MSG_HW);
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
+
+/* This is the time (in jiffies) between invocations of the hardware
+ * monitor.
+ * On Falcon-based NICs, this will:
+ * - Check the on-board hardware monitor;
+ * - Poll the link state and reconfigure the hardware as necessary.
+ * On Siena-based NICs for power systems with EEH support, this will give EEH a
+ * chance to start.
+ */
+static unsigned int efx_monitor_interval = 1 * HZ;
+
+/* How often and how many times to poll for a reset while waiting for a
+ * BIST that another function started to complete.
+ */
+#define BIST_WAIT_DELAY_MS	100
+#define BIST_WAIT_DELAY_COUNT	100
+
+/* Default stats update time */
+#define STATS_PERIOD_MS_DEFAULT 1000
+
+const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
+const char *const efx_reset_type_names[] = {
+	[RESET_TYPE_INVISIBLE]          = "INVISIBLE",
+	[RESET_TYPE_ALL]                = "ALL",
+	[RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
+	[RESET_TYPE_WORLD]              = "WORLD",
+	[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
+	[RESET_TYPE_DATAPATH]           = "DATAPATH",
+	[RESET_TYPE_MC_BIST]		= "MC_BIST",
+	[RESET_TYPE_DISABLE]            = "DISABLE",
+	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
+	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
+	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
+	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
+	[RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
+	[RESET_TYPE_MCDI_TIMEOUT]	= "MCDI_TIMEOUT (FLR)",
+};
+
+#define RESET_TYPE(type) \
+	STRING_TABLE_LOOKUP(type, efx_reset_type)
+
+/* Loopback mode names (see LOOPBACK_MODE()) */
+const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
+const char *const efx_loopback_mode_names[] = {
+	[LOOPBACK_NONE]		= "NONE",
+	[LOOPBACK_DATA]		= "DATAPATH",
+	[LOOPBACK_GMAC]		= "GMAC",
+	[LOOPBACK_XGMII]	= "XGMII",
+	[LOOPBACK_XGXS]		= "XGXS",
+	[LOOPBACK_XAUI]		= "XAUI",
+	[LOOPBACK_GMII]		= "GMII",
+	[LOOPBACK_SGMII]	= "SGMII",
+	[LOOPBACK_XGBR]		= "XGBR",
+	[LOOPBACK_XFI]		= "XFI",
+	[LOOPBACK_XAUI_FAR]	= "XAUI_FAR",
+	[LOOPBACK_GMII_FAR]	= "GMII_FAR",
+	[LOOPBACK_SGMII_FAR]	= "SGMII_FAR",
+	[LOOPBACK_XFI_FAR]	= "XFI_FAR",
+	[LOOPBACK_GPHY]		= "GPHY",
+	[LOOPBACK_PHYXS]	= "PHYXS",
+	[LOOPBACK_PCS]		= "PCS",
+	[LOOPBACK_PMAPMD]	= "PMA/PMD",
+	[LOOPBACK_XPORT]	= "XPORT",
+	[LOOPBACK_XGMII_WS]	= "XGMII_WS",
+	[LOOPBACK_XAUI_WS]	= "XAUI_WS",
+	[LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
+	[LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
+	[LOOPBACK_GMII_WS]	= "GMII_WS",
+	[LOOPBACK_XFI_WS]	= "XFI_WS",
+	[LOOPBACK_XFI_WS_FAR]	= "XFI_WS_FAR",
+	[LOOPBACK_PHYXS_WS]	= "PHYXS_WS",
+};
+
+/* Reset workqueue. If any NIC has a hardware failure then a reset will be
+ * queued onto this work queue. This is not a per-nic work queue, because
+ * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
+ */
+static struct workqueue_struct *reset_workqueue;
+
+int efx_create_reset_workqueue(void)
+{
+	reset_workqueue = create_singlethread_workqueue("sfc_reset");
+	if (!reset_workqueue) {
+		printk(KERN_ERR "Failed to create reset workqueue\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void efx_queue_reset_work(struct efx_nic *efx)
+{
+	queue_work(reset_workqueue, &efx->reset_work);
+}
+
+void efx_flush_reset_workqueue(struct efx_nic *efx)
+{
+	cancel_work_sync(&efx->reset_work);
+}
+
+void efx_destroy_reset_workqueue(void)
+{
+	if (reset_workqueue) {
+		destroy_workqueue(reset_workqueue);
+		reset_workqueue = NULL;
+	}
+}
+
+/* We assume that efx->type->reconfigure_mac will always try to sync RX
+ * filters and therefore needs to read-lock the filter table against freeing
+ */
+void efx_mac_reconfigure(struct efx_nic *efx)
+{
+	if (efx->type->reconfigure_mac) {
+		down_read(&efx->filter_sem);
+		efx->type->reconfigure_mac(efx);
+		up_read(&efx->filter_sem);
+	}
+}
+
+/* Asynchronous work item for changing MAC promiscuity and multicast
+ * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
+ * MAC directly.
+ */
+static void efx_mac_work(struct work_struct *data)
+{
+	struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
+
+	mutex_lock(&efx->mac_lock);
+	if (efx->port_enabled)
+		efx_mac_reconfigure(efx);
+	mutex_unlock(&efx->mac_lock);
+}
+
+/* This ensures that the kernel is kept informed (via
+ * netif_carrier_on/off) of the link status, and also maintains the
+ * link status's stop on the port's TX queue.
+ */
+void efx_link_status_changed(struct efx_nic *efx)
+{
+	struct efx_link_state *link_state = &efx->link_state;
+
+	/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
+	 * that no events are triggered between unregister_netdev() and the
+	 * driver unloading. A more general condition is that NETDEV_CHANGE
+	 * can only be generated between NETDEV_UP and NETDEV_DOWN
+	 */
+	if (!netif_running(efx->net_dev))
+		return;
+
+	if (link_state->up != netif_carrier_ok(efx->net_dev)) {
+		efx->n_link_state_changes++;
+
+		if (link_state->up)
+			netif_carrier_on(efx->net_dev);
+		else
+			netif_carrier_off(efx->net_dev);
+	}
+
+	/* Status message for kernel log */
+	if (link_state->up)
+		netif_info(efx, link, efx->net_dev,
+			   "link up at %uMbps %s-duplex (MTU %d)\n",
+			   link_state->speed, link_state->fd ? "full" : "half",
+			   efx->net_dev->mtu);
+	else
+		netif_info(efx, link, efx->net_dev, "link down\n");
+}
+
+unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
+{
+	/* The maximum MTU that we can fit in a single page, allowing for
+	 * framing, overhead and XDP headroom.
+	 */
+	int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
+		       efx->rx_prefix_size + efx->type->rx_buffer_padding +
+		       efx->rx_ip_align + XDP_PACKET_HEADROOM;
+
+	return PAGE_SIZE - overhead;
+}
+
+/* Context: process, rtnl_lock() held. */
+int efx_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	rc = efx_check_disabled(efx);
+	if (rc)
+		return rc;
+
+	if (rtnl_dereference(efx->xdp_prog) &&
+	    new_mtu > efx_xdp_max_mtu(efx)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Requested MTU of %d too big for XDP (max: %d)\n",
+			  new_mtu, efx_xdp_max_mtu(efx));
+		return -EINVAL;
+	}
+
+	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
+
+	efx_device_detach_sync(efx);
+	efx_stop_all(efx);
+
+	mutex_lock(&efx->mac_lock);
+	net_dev->mtu = new_mtu;
+	efx_mac_reconfigure(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	efx_start_all(efx);
+	efx_device_attach_if_not_resetting(efx);
+	return 0;
+}
+
+/**************************************************************************
+ *
+ * Hardware monitor
+ *
+ **************************************************************************/
+
+/* Run periodically off the general workqueue */
+static void efx_monitor(struct work_struct *data)
+{
+	struct efx_nic *efx = container_of(data, struct efx_nic,
+					   monitor_work.work);
+
+	netif_vdbg(efx, timer, efx->net_dev,
+		   "hardware monitor executing on CPU %d\n",
+		   raw_smp_processor_id());
+	BUG_ON(efx->type->monitor == NULL);
+
+	/* If the mac_lock is already held then it is likely a port
+	 * reconfiguration is already in place, which will likely do
+	 * most of the work of monitor() anyway.
+	 */
+	if (mutex_trylock(&efx->mac_lock)) {
+		if (efx->port_enabled && efx->type->monitor)
+			efx->type->monitor(efx);
+		mutex_unlock(&efx->mac_lock);
+	}
+
+	efx_start_monitor(efx);
+}
+
+void efx_start_monitor(struct efx_nic *efx)
+{
+	if (efx->type->monitor)
+		queue_delayed_work(efx->workqueue, &efx->monitor_work,
+				   efx_monitor_interval);
+}
+
+/**************************************************************************
+ *
+ * Event queue processing
+ *
+ *************************************************************************/
+
+/* Channels are shutdown and reinitialised whilst the NIC is running
+ * to propagate configuration changes (mtu, checksum offload), or
+ * to clear hardware error conditions
+ */
+static void efx_start_datapath(struct efx_nic *efx)
+{
+	netdev_features_t old_features = efx->net_dev->features;
+	bool old_rx_scatter = efx->rx_scatter;
+	size_t rx_buf_len;
+
+	/* Calculate the rx buffer allocation parameters required to
+	 * support the current MTU, including padding for header
+	 * alignment and overruns.
+	 */
+	efx->rx_dma_len = (efx->rx_prefix_size +
+			   EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
+			   efx->type->rx_buffer_padding);
+	rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
+		      efx->rx_ip_align + efx->rx_dma_len);
+	if (rx_buf_len <= PAGE_SIZE) {
+		efx->rx_scatter = efx->type->always_rx_scatter;
+		efx->rx_buffer_order = 0;
+	} else if (efx->type->can_rx_scatter) {
+		BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
+		BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
+			     2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
+				       EFX_RX_BUF_ALIGNMENT) >
+			     PAGE_SIZE);
+		efx->rx_scatter = true;
+		efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
+		efx->rx_buffer_order = 0;
+	} else {
+		efx->rx_scatter = false;
+		efx->rx_buffer_order = get_order(rx_buf_len);
+	}
+
+	efx_rx_config_page_split(efx);
+	if (efx->rx_buffer_order)
+		netif_dbg(efx, drv, efx->net_dev,
+			  "RX buf len=%u; page order=%u batch=%u\n",
+			  efx->rx_dma_len, efx->rx_buffer_order,
+			  efx->rx_pages_per_batch);
+	else
+		netif_dbg(efx, drv, efx->net_dev,
+			  "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
+			  efx->rx_dma_len, efx->rx_page_buf_step,
+			  efx->rx_bufs_per_page, efx->rx_pages_per_batch);
+
+	/* Restore previously fixed features in hw_features and remove
+	 * features which are fixed now
+	 */
+	efx->net_dev->hw_features |= efx->net_dev->features;
+	efx->net_dev->hw_features &= ~efx->fixed_features;
+	efx->net_dev->features |= efx->fixed_features;
+	if (efx->net_dev->features != old_features)
+		netdev_features_change(efx->net_dev);
+
+	/* RX filters may also have scatter-enabled flags */
+	if ((efx->rx_scatter != old_rx_scatter) &&
+	    efx->type->filter_update_rx_scatter)
+		efx->type->filter_update_rx_scatter(efx);
+
+	/* We must keep at least one descriptor in a TX ring empty.
+	 * We could avoid this when the queue size does not exactly
+	 * match the hardware ring size, but it's not that important.
+	 * Therefore we stop the queue when one more skb might fill
+	 * the ring completely.  We wake it when half way back to
+	 * empty.
+	 */
+	efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
+	efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
+
+	/* Initialise the channels */
+	efx_start_channels(efx);
+
+	efx_ptp_start_datapath(efx);
+
+	if (netif_device_present(efx->net_dev))
+		netif_tx_wake_all_queues(efx->net_dev);
+}
+
+static void efx_stop_datapath(struct efx_nic *efx)
+{
+	EFX_ASSERT_RESET_SERIALISED(efx);
+	BUG_ON(efx->port_enabled);
+
+	efx_ptp_stop_datapath(efx);
+
+	efx_stop_channels(efx);
+}
+
+/**************************************************************************
+ *
+ * Port handling
+ *
+ **************************************************************************/
+
+static void efx_start_port(struct efx_nic *efx)
+{
+	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
+	BUG_ON(efx->port_enabled);
+
+	mutex_lock(&efx->mac_lock);
+	efx->port_enabled = true;
+
+	/* Ensure MAC ingress/egress is enabled */
+	efx_mac_reconfigure(efx);
+
+	mutex_unlock(&efx->mac_lock);
+}
+
+/* Cancel work for MAC reconfiguration, periodic hardware monitoring
+ * and the async self-test, wait for them to finish and prevent them
+ * being scheduled again.  This doesn't cover online resets, which
+ * should only be cancelled when removing the device.
+ */
+static void efx_stop_port(struct efx_nic *efx)
+{
+	netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
+
+	EFX_ASSERT_RESET_SERIALISED(efx);
+
+	mutex_lock(&efx->mac_lock);
+	efx->port_enabled = false;
+	mutex_unlock(&efx->mac_lock);
+
+	/* Serialise against efx_set_multicast_list() */
+	netif_addr_lock_bh(efx->net_dev);
+	netif_addr_unlock_bh(efx->net_dev);
+
+	cancel_delayed_work_sync(&efx->monitor_work);
+	efx_selftest_async_cancel(efx);
+	cancel_work_sync(&efx->mac_work);
+}
+
+/* If the interface is supposed to be running but is not, start
+ * the hardware and software data path, regular activity for the port
+ * (MAC statistics, link polling, etc.) and schedule the port to be
+ * reconfigured.  Interrupts must already be enabled.  This function
+ * is safe to call multiple times, so long as the NIC is not disabled.
+ * Requires the RTNL lock.
+ */
+void efx_start_all(struct efx_nic *efx)
+{
+	EFX_ASSERT_RESET_SERIALISED(efx);
+	BUG_ON(efx->state == STATE_DISABLED);
+
+	/* Check that it is appropriate to restart the interface. All
+	 * of these flags are safe to read under just the rtnl lock
+	 */
+	if (efx->port_enabled || !netif_running(efx->net_dev) ||
+	    efx->reset_pending)
+		return;
+
+	efx_start_port(efx);
+	efx_start_datapath(efx);
+
+	/* Start the hardware monitor if there is one */
+	efx_start_monitor(efx);
+
+	/* Link state detection is normally event-driven; we have
+	 * to poll now because we could have missed a change
+	 */
+	mutex_lock(&efx->mac_lock);
+	if (efx->phy_op->poll(efx))
+		efx_link_status_changed(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	if (efx->type->start_stats) {
+		efx->type->start_stats(efx);
+		efx->type->pull_stats(efx);
+		spin_lock_bh(&efx->stats_lock);
+		efx->type->update_stats(efx, NULL, NULL);
+		spin_unlock_bh(&efx->stats_lock);
+	}
+}
+
+/* Quiesce the hardware and software data path, and regular activity
+ * for the port without bringing the link down.  Safe to call multiple
+ * times with the NIC in almost any state, but interrupts should be
+ * enabled.  Requires the RTNL lock.
+ */
+void efx_stop_all(struct efx_nic *efx)
+{
+	EFX_ASSERT_RESET_SERIALISED(efx);
+
+	/* port_enabled can be read safely under the rtnl lock */
+	if (!efx->port_enabled)
+		return;
+
+	if (efx->type->update_stats) {
+		/* update stats before we go down so we can accurately count
+		 * rx_nodesc_drops
+		 */
+		efx->type->pull_stats(efx);
+		spin_lock_bh(&efx->stats_lock);
+		efx->type->update_stats(efx, NULL, NULL);
+		spin_unlock_bh(&efx->stats_lock);
+		efx->type->stop_stats(efx);
+	}
+
+	efx_stop_port(efx);
+
+	/* Stop the kernel transmit interface.  This is only valid if
+	 * the device is stopped or detached; otherwise the watchdog
+	 * may fire immediately.
+	 */
+	WARN_ON(netif_running(efx->net_dev) &&
+		netif_device_present(efx->net_dev));
+	netif_tx_disable(efx->net_dev);
+
+	efx_stop_datapath(efx);
+}
+
+/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	spin_lock_bh(&efx->stats_lock);
+	efx->type->update_stats(efx, NULL, stats);
+	spin_unlock_bh(&efx->stats_lock);
+}
+
+/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
+ * the MAC appropriately. All other PHY configuration changes are pushed
+ * through phy_op->set_settings(), and pushed asynchronously to the MAC
+ * through efx_monitor().
+ *
+ * Callers must hold the mac_lock
+ */
+int __efx_reconfigure_port(struct efx_nic *efx)
+{
+	enum efx_phy_mode phy_mode;
+	int rc = 0;
+
+	WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+	/* Disable PHY transmit in mac level loopbacks */
+	phy_mode = efx->phy_mode;
+	if (LOOPBACK_INTERNAL(efx))
+		efx->phy_mode |= PHY_MODE_TX_DISABLED;
+	else
+		efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
+
+	if (efx->type->reconfigure_port)
+		rc = efx->type->reconfigure_port(efx);
+
+	if (rc)
+		efx->phy_mode = phy_mode;
+
+	return rc;
+}
+
+/* Reinitialise the MAC to pick up new PHY settings, even if the port is
+ * disabled.
+ */
+int efx_reconfigure_port(struct efx_nic *efx)
+{
+	int rc;
+
+	EFX_ASSERT_RESET_SERIALISED(efx);
+
+	mutex_lock(&efx->mac_lock);
+	rc = __efx_reconfigure_port(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+/**************************************************************************
+ *
+ * Device reset and suspend
+ *
+ **************************************************************************/
+
+static void efx_wait_for_bist_end(struct efx_nic *efx)
+{
+	int i;
+
+	for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
+		if (efx_mcdi_poll_reboot(efx))
+			goto out;
+		msleep(BIST_WAIT_DELAY_MS);
+	}
+
+	netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
+out:
+	/* Either way unset the BIST flag. If we found no reboot we probably
+	 * won't recover, but we should try.
+	 */
+	efx->mc_bist_for_other_fn = false;
+}
+
+/* Try recovery mechanisms.
+ * For now only EEH is supported.
+ * Returns 0 if the recovery mechanisms are unsuccessful.
+ * Returns a non-zero value otherwise.
+ */
+int efx_try_recovery(struct efx_nic *efx)
+{
+#ifdef CONFIG_EEH
+	/* A PCI error can occur and not be seen by EEH because nothing
+	 * happens on the PCI bus. In this case the driver may fail and
+	 * schedule a 'recover or reset', leading to this recovery handler.
+	 * Manually call the eeh failure check function.
+	 */
+	struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
+	if (eeh_dev_check_failure(eehdev)) {
+		/* The EEH mechanisms will handle the error and reset the
+		 * device if necessary.
+		 */
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+/* Tears down the entire software state and most of the hardware state
+ * before reset.
+ */
+void efx_reset_down(struct efx_nic *efx, enum reset_type method)
+{
+	EFX_ASSERT_RESET_SERIALISED(efx);
+
+	if (method == RESET_TYPE_MCDI_TIMEOUT)
+		efx->type->prepare_flr(efx);
+
+	efx_stop_all(efx);
+	efx_disable_interrupts(efx);
+
+	mutex_lock(&efx->mac_lock);
+	down_write(&efx->filter_sem);
+	mutex_lock(&efx->rss_lock);
+	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+	    method != RESET_TYPE_DATAPATH)
+		efx->phy_op->fini(efx);
+	efx->type->fini(efx);
+}
+
+/* This function will always ensure that the locks acquired in
+ * efx_reset_down() are released. A failure return code indicates
+ * that we were unable to reinitialise the hardware, and the
+ * driver should be disabled. If ok is false, then the rx and tx
+ * engines are not restarted, pending a RESET_DISABLE.
+ */
+int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
+{
+	int rc;
+
+	EFX_ASSERT_RESET_SERIALISED(efx);
+
+	if (method == RESET_TYPE_MCDI_TIMEOUT)
+		efx->type->finish_flr(efx);
+
+	/* Ensure that SRAM is initialised even if we're disabling the device */
+	rc = efx->type->init(efx);
+	if (rc) {
+		netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
+		goto fail;
+	}
+
+	if (!ok)
+		goto fail;
+
+	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+	    method != RESET_TYPE_DATAPATH) {
+		rc = efx->phy_op->init(efx);
+		if (rc)
+			goto fail;
+		rc = efx->phy_op->reconfigure(efx);
+		if (rc && rc != -EPERM)
+			netif_err(efx, drv, efx->net_dev,
+				  "could not restore PHY settings\n");
+	}
+
+	rc = efx_enable_interrupts(efx);
+	if (rc)
+		goto fail;
+
+#ifdef CONFIG_SFC_SRIOV
+	rc = efx->type->vswitching_restore(efx);
+	if (rc) /* not fatal; the PF will still work fine */
+		netif_warn(efx, probe, efx->net_dev,
+			   "failed to restore vswitching rc=%d;"
+			   " VFs may not function\n", rc);
+#endif
+
+	if (efx->type->rx_restore_rss_contexts)
+		efx->type->rx_restore_rss_contexts(efx);
+	mutex_unlock(&efx->rss_lock);
+	efx->type->filter_table_restore(efx);
+	up_write(&efx->filter_sem);
+	if (efx->type->sriov_reset)
+		efx->type->sriov_reset(efx);
+
+	mutex_unlock(&efx->mac_lock);
+
+	efx_start_all(efx);
+
+	if (efx->type->udp_tnl_push_ports)
+		efx->type->udp_tnl_push_ports(efx);
+
+	return 0;
+
+fail:
+	efx->port_initialized = false;
+
+	mutex_unlock(&efx->rss_lock);
+	up_write(&efx->filter_sem);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+/* Reset the NIC using the specified method.  Note that the reset may
+ * fail, in which case the card will be left in an unusable state.
+ *
+ * Caller must hold the rtnl_lock.
+ */
+int efx_reset(struct efx_nic *efx, enum reset_type method)
+{
+	bool disabled;
+	int rc, rc2;
+
+	netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
+		   RESET_TYPE(method));
+
+	efx_device_detach_sync(efx);
+	efx_reset_down(efx, method);
+
+	rc = efx->type->reset(efx, method);
+	if (rc) {
+		netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
+		goto out;
+	}
+
+	/* Clear flags for the scopes we covered.  We assume the NIC and
+	 * driver are now quiescent so that there is no race here.
+	 */
+	if (method < RESET_TYPE_MAX_METHOD)
+		efx->reset_pending &= -(1 << (method + 1));
+	else /* it doesn't fit into the well-ordered scope hierarchy */
+		__clear_bit(method, &efx->reset_pending);
+
+	/* Reinitialise bus-mastering, which may have been turned off before
+	 * the reset was scheduled. This is still appropriate, even in the
+	 * RESET_TYPE_DISABLE since this driver generally assumes the hardware
+	 * can respond to requests.
+	 */
+	pci_set_master(efx->pci_dev);
+
+out:
+	/* Leave device stopped if necessary */
+	disabled = rc ||
+		method == RESET_TYPE_DISABLE ||
+		method == RESET_TYPE_RECOVER_OR_DISABLE;
+	rc2 = efx_reset_up(efx, method, !disabled);
+	if (rc2) {
+		disabled = true;
+		if (!rc)
+			rc = rc2;
+	}
+
+	if (disabled) {
+		dev_close(efx->net_dev);
+		netif_err(efx, drv, efx->net_dev, "has been disabled\n");
+		efx->state = STATE_DISABLED;
+	} else {
+		netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
+		efx_device_attach_if_not_resetting(efx);
+	}
+	return rc;
+}
+
+/* The worker thread exists so that code that cannot sleep can
+ * schedule a reset for later.
+ */
+static void efx_reset_work(struct work_struct *data)
+{
+	struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
+	unsigned long pending;
+	enum reset_type method;
+
+	pending = READ_ONCE(efx->reset_pending);
+	method = fls(pending) - 1;
+
+	if (method == RESET_TYPE_MC_BIST)
+		efx_wait_for_bist_end(efx);
+
+	if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
+	     method == RESET_TYPE_RECOVER_OR_ALL) &&
+	    efx_try_recovery(efx))
+		return;
+
+	if (!pending)
+		return;
+
+	rtnl_lock();
+
+	/* We checked the state in efx_schedule_reset() but it may
+	 * have changed by now.  Now that we have the RTNL lock,
+	 * it cannot change again.
+	 */
+	if (efx->state == STATE_READY)
+		(void)efx_reset(efx, method);
+
+	rtnl_unlock();
+}
+
+void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
+{
+	enum reset_type method;
+
+	if (efx->state == STATE_RECOVERY) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "recovering: skip scheduling %s reset\n",
+			  RESET_TYPE(type));
+		return;
+	}
+
+	switch (type) {
+	case RESET_TYPE_INVISIBLE:
+	case RESET_TYPE_ALL:
+	case RESET_TYPE_RECOVER_OR_ALL:
+	case RESET_TYPE_WORLD:
+	case RESET_TYPE_DISABLE:
+	case RESET_TYPE_RECOVER_OR_DISABLE:
+	case RESET_TYPE_DATAPATH:
+	case RESET_TYPE_MC_BIST:
+	case RESET_TYPE_MCDI_TIMEOUT:
+		method = type;
+		netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
+			  RESET_TYPE(method));
+		break;
+	default:
+		method = efx->type->map_reset_reason(type);
+		netif_dbg(efx, drv, efx->net_dev,
+			  "scheduling %s reset for %s\n",
+			  RESET_TYPE(method), RESET_TYPE(type));
+		break;
+	}
+
+	set_bit(method, &efx->reset_pending);
+	smp_mb(); /* ensure we change reset_pending before checking state */
+
+	/* If we're not READY then just leave the flags set as the cue
+	 * to abort probing or reschedule the reset later.
+	 */
+	if (READ_ONCE(efx->state) != STATE_READY)
+		return;
+
+	/* efx_process_channel() will no longer read events once a
+	 * reset is scheduled. So switch back to poll'd MCDI completions.
+	 */
+	efx_mcdi_mode_poll(efx);
+
+	efx_queue_reset_work(efx);
+}
+
+/**************************************************************************
+ *
+ * Dummy PHY/MAC operations
+ *
+ * Can be used for some unimplemented operations
+ * Needed so all function pointers are valid and do not have to be tested
+ * before use
+ *
+ **************************************************************************/
+int efx_port_dummy_op_int(struct efx_nic *efx)
+{
+	return 0;
+}
+void efx_port_dummy_op_void(struct efx_nic *efx) {}
+
+static bool efx_port_dummy_op_poll(struct efx_nic *efx)
+{
+	return false;
+}
+
+static const struct efx_phy_operations efx_dummy_phy_operations = {
+	.init		 = efx_port_dummy_op_int,
+	.reconfigure	 = efx_port_dummy_op_int,
+	.poll		 = efx_port_dummy_op_poll,
+	.fini		 = efx_port_dummy_op_void,
+};
+
+/**************************************************************************
+ *
+ * Data housekeeping
+ *
+ **************************************************************************/
+
+/* This zeroes out and then fills in the invariants in a struct
+ * efx_nic (including all sub-structures).
+ */
+int efx_init_struct(struct efx_nic *efx,
+		    struct pci_dev *pci_dev, struct net_device *net_dev)
+{
+	int rc = -ENOMEM;
+
+	/* Initialise common structures */
+	INIT_LIST_HEAD(&efx->node);
+	INIT_LIST_HEAD(&efx->secondary_list);
+	spin_lock_init(&efx->biu_lock);
+#ifdef CONFIG_SFC_MTD
+	INIT_LIST_HEAD(&efx->mtd_list);
+#endif
+	INIT_WORK(&efx->reset_work, efx_reset_work);
+	INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
+	efx_selftest_async_init(efx);
+	efx->pci_dev = pci_dev;
+	efx->msg_enable = debug;
+	efx->state = STATE_UNINIT;
+	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
+
+	efx->net_dev = net_dev;
+	efx->rx_prefix_size = efx->type->rx_prefix_size;
+	efx->rx_ip_align =
+		NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
+	efx->rx_packet_hash_offset =
+		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
+	efx->rx_packet_ts_offset =
+		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+	INIT_LIST_HEAD(&efx->rss_context.list);
+	mutex_init(&efx->rss_lock);
+	spin_lock_init(&efx->stats_lock);
+	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
+	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
+	BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
+	mutex_init(&efx->mac_lock);
+#ifdef CONFIG_RFS_ACCEL
+	mutex_init(&efx->rps_mutex);
+	spin_lock_init(&efx->rps_hash_lock);
+	/* Failure to allocate is not fatal, but may degrade ARFS performance */
+	efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
+				      sizeof(*efx->rps_hash_table), GFP_KERNEL);
+#endif
+	efx->phy_op = &efx_dummy_phy_operations;
+	efx->mdio.dev = net_dev;
+	INIT_WORK(&efx->mac_work, efx_mac_work);
+	init_waitqueue_head(&efx->flush_wq);
+
+	rc = efx_init_channels(efx);
+	if (rc)
+		goto fail;
+
+	/* Would be good to use the net_dev name, but we're too early */
+	snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
+		 pci_name(pci_dev));
+	efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
+	if (!efx->workqueue) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	efx_fini_struct(efx);
+	return rc;
+}
+
+void efx_fini_struct(struct efx_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+	kfree(efx->rps_hash_table);
+#endif
+
+	efx_fini_channels(efx);
+
+	kfree(efx->vpd_sn);
+
+	if (efx->workqueue) {
+		destroy_workqueue(efx->workqueue);
+		efx->workqueue = NULL;
+	}
+}
+
+/* This configures the PCI device to enable I/O and DMA. */
+int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
+		unsigned int mem_map_size)
+{
+	struct pci_dev *pci_dev = efx->pci_dev;
+	int rc;
+
+	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
+
+	rc = pci_enable_device(pci_dev);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "failed to enable PCI device\n");
+		goto fail1;
+	}
+
+	pci_set_master(pci_dev);
+
+	/* Set the PCI DMA mask.  Try all possibilities from our
+	 * genuine mask down to 32 bits, because some architectures
+	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+	 * masks event though they reject 46 bit masks.
+	 */
+	while (dma_mask > 0x7fffffffUL) {
+		rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
+		if (rc == 0)
+			break;
+		dma_mask >>= 1;
+	}
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "could not find a suitable DMA mask\n");
+		goto fail2;
+	}
+	netif_dbg(efx, probe, efx->net_dev,
+		  "using DMA mask %llx\n", (unsigned long long)dma_mask);
+
+	efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
+	if (!efx->membase_phys) {
+		netif_err(efx, probe, efx->net_dev,
+			  "ERROR: No BAR%d mapping from the BIOS. "
+			  "Try pci=realloc on the kernel command line\n", bar);
+		rc = -ENODEV;
+		goto fail3;
+	}
+
+	rc = pci_request_region(pci_dev, bar, "sfc");
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "request for memory BAR failed\n");
+		rc = -EIO;
+		goto fail3;
+	}
+
+	efx->membase = ioremap(efx->membase_phys, mem_map_size);
+	if (!efx->membase) {
+		netif_err(efx, probe, efx->net_dev,
+			  "could not map memory BAR at %llx+%x\n",
+			  (unsigned long long)efx->membase_phys, mem_map_size);
+		rc = -ENOMEM;
+		goto fail4;
+	}
+	netif_dbg(efx, probe, efx->net_dev,
+		  "memory BAR at %llx+%x (virtual %p)\n",
+		  (unsigned long long)efx->membase_phys, mem_map_size,
+		  efx->membase);
+
+	return 0;
+
+fail4:
+	pci_release_region(efx->pci_dev, bar);
+fail3:
+	efx->membase_phys = 0;
+fail2:
+	pci_disable_device(efx->pci_dev);
+fail1:
+	return rc;
+}
+
+void efx_fini_io(struct efx_nic *efx, int bar)
+{
+	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
+
+	if (efx->membase) {
+		iounmap(efx->membase);
+		efx->membase = NULL;
+	}
+
+	if (efx->membase_phys) {
+		pci_release_region(efx->pci_dev, bar);
+		efx->membase_phys = 0;
+	}
+
+	/* Don't disable bus-mastering if VFs are assigned */
+	if (!pci_vfs_assigned(efx->pci_dev))
+		pci_disable_device(efx->pci_dev);
+}
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct efx_nic *efx = dev_get_drvdata(dev);
+	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
+}
+
+static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct efx_nic *efx = dev_get_drvdata(dev);
+	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+	bool enable = count > 0 && *buf != '0';
+
+	mcdi->logging_enabled = enable;
+	return count;
+}
+
+static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
+
+void efx_init_mcdi_logging(struct efx_nic *efx)
+{
+	int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
+
+	if (rc) {
+		netif_warn(efx, drv, efx->net_dev,
+			   "failed to init net dev attributes\n");
+	}
+}
+
+void efx_fini_mcdi_logging(struct efx_nic *efx)
+{
+	device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
+}
+#endif
diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h
new file mode 100644
index 000000000000..fa2fc681e7f9
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_common.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_COMMON_H
+#define EFX_COMMON_H
+
+int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
+		unsigned int mem_map_size);
+void efx_fini_io(struct efx_nic *efx, int bar);
+int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev,
+		    struct net_device *net_dev);
+void efx_fini_struct(struct efx_nic *efx);
+
+void efx_start_all(struct efx_nic *efx);
+void efx_stop_all(struct efx_nic *efx);
+
+void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats);
+
+int efx_create_reset_workqueue(void);
+void efx_queue_reset_work(struct efx_nic *efx);
+void efx_flush_reset_workqueue(struct efx_nic *efx);
+void efx_destroy_reset_workqueue(void);
+
+void efx_start_monitor(struct efx_nic *efx);
+
+int __efx_reconfigure_port(struct efx_nic *efx);
+int efx_reconfigure_port(struct efx_nic *efx);
+
+#define EFX_ASSERT_RESET_SERIALISED(efx)		\
+	do {						\
+		if ((efx->state == STATE_READY) ||	\
+		    (efx->state == STATE_RECOVERY) ||	\
+		    (efx->state == STATE_DISABLED))	\
+			ASSERT_RTNL();			\
+	} while (0)
+
+int efx_try_recovery(struct efx_nic *efx);
+void efx_reset_down(struct efx_nic *efx, enum reset_type method);
+int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
+int efx_reset(struct efx_nic *efx, enum reset_type method);
+void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
+
+static inline int efx_check_disabled(struct efx_nic *efx)
+{
+	if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
+		netif_err(efx, drv, efx->net_dev,
+			  "device is disabled due to earlier errors\n");
+		return -EIO;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+void efx_init_mcdi_logging(struct efx_nic *efx);
+void efx_fini_mcdi_logging(struct efx_nic *efx);
+#else
+static inline void efx_init_mcdi_logging(struct efx_nic *efx) {}
+static inline void efx_fini_mcdi_logging(struct efx_nic *efx) {}
+#endif
+
+void efx_mac_reconfigure(struct efx_nic *efx);
+void efx_link_status_changed(struct efx_nic *efx);
+unsigned int efx_xdp_max_mtu(struct efx_nic *efx);
+int efx_change_mtu(struct net_device *net_dev, int new_mtu);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index b31032da4bcb..993b5769525b 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -13,92 +13,13 @@
 #include "workarounds.h"
 #include "selftest.h"
 #include "efx.h"
+#include "efx_channels.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "ethtool_common.h"
 #include "filter.h"
 #include "nic.h"
 
-struct efx_sw_stat_desc {
-	const char *name;
-	enum {
-		EFX_ETHTOOL_STAT_SOURCE_nic,
-		EFX_ETHTOOL_STAT_SOURCE_channel,
-		EFX_ETHTOOL_STAT_SOURCE_tx_queue
-	} source;
-	unsigned offset;
-	u64(*get_stat) (void *field); /* Reader function */
-};
-
-/* Initialiser for a struct efx_sw_stat_desc with type-checking */
-#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
-				get_stat_function) {			\
-	.name = #stat_name,						\
-	.source = EFX_ETHTOOL_STAT_SOURCE_##source_name,		\
-	.offset = ((((field_type *) 0) ==				\
-		      &((struct efx_##source_name *)0)->field) ?	\
-		    offsetof(struct efx_##source_name, field) :		\
-		    offsetof(struct efx_##source_name, field)),		\
-	.get_stat = get_stat_function,					\
-}
-
-static u64 efx_get_uint_stat(void *field)
-{
-	return *(unsigned int *)field;
-}
-
-static u64 efx_get_atomic_stat(void *field)
-{
-	return atomic_read((atomic_t *) field);
-}
-
-#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field)		\
-	EFX_ETHTOOL_STAT(field, nic, field,			\
-			 atomic_t, efx_get_atomic_stat)
-
-#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field)			\
-	EFX_ETHTOOL_STAT(field, channel, n_##field,		\
-			 unsigned int, efx_get_uint_stat)
-#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field)		\
-	EFX_ETHTOOL_STAT(field, channel, field,			\
-			 unsigned int, efx_get_uint_stat)
-
-#define EFX_ETHTOOL_UINT_TXQ_STAT(field)			\
-	EFX_ETHTOOL_STAT(tx_##field, tx_queue, field,		\
-			 unsigned int, efx_get_uint_stat)
-
-static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
-	EFX_ETHTOOL_UINT_TXQ_STAT(merge_events),
-	EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
-	EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
-	EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
-	EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
-	EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
-	EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
-	EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
-	EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
-#ifdef CONFIG_RFS_ACCEL
-	EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded),
-	EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed),
-#endif
-};
-
-#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
-
 #define EFX_ETHTOOL_EEPROM_MAGIC 0xEFAB
 
 /**************************************************************************
@@ -185,18 +106,6 @@ efx_ethtool_set_link_ksettings(struct net_device *net_dev,
 	return rc;
 }
 
-static void efx_ethtool_get_drvinfo(struct net_device *net_dev,
-				    struct ethtool_drvinfo *info)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-
-	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
-	strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
-	efx_mcdi_print_fwver(efx, info->fw_version,
-			     sizeof(info->fw_version));
-	strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
-}
-
 static int efx_ethtool_get_regs_len(struct net_device *net_dev)
 {
 	return efx_nic_get_regs_len(netdev_priv(net_dev));
@@ -211,341 +120,6 @@ static void efx_ethtool_get_regs(struct net_device *net_dev,
 	efx_nic_get_regs(efx, buf);
 }
 
-static u32 efx_ethtool_get_msglevel(struct net_device *net_dev)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	return efx->msg_enable;
-}
-
-static void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	efx->msg_enable = msg_enable;
-}
-
-/**
- * efx_fill_test - fill in an individual self-test entry
- * @test_index:		Index of the test
- * @strings:		Ethtool strings, or %NULL
- * @data:		Ethtool test results, or %NULL
- * @test:		Pointer to test result (used only if data != %NULL)
- * @unit_format:	Unit name format (e.g. "chan\%d")
- * @unit_id:		Unit id (e.g. 0 for "chan0")
- * @test_format:	Test name format (e.g. "loopback.\%s.tx.sent")
- * @test_id:		Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
- *
- * Fill in an individual self-test entry.
- */
-static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data,
-			  int *test, const char *unit_format, int unit_id,
-			  const char *test_format, const char *test_id)
-{
-	char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
-
-	/* Fill data value, if applicable */
-	if (data)
-		data[test_index] = *test;
-
-	/* Fill string, if applicable */
-	if (strings) {
-		if (strchr(unit_format, '%'))
-			snprintf(unit_str, sizeof(unit_str),
-				 unit_format, unit_id);
-		else
-			strcpy(unit_str, unit_format);
-		snprintf(test_str, sizeof(test_str), test_format, test_id);
-		snprintf(strings + test_index * ETH_GSTRING_LEN,
-			 ETH_GSTRING_LEN,
-			 "%-6s %-24s", unit_str, test_str);
-	}
-}
-
-#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel
-#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
-#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
-#define EFX_LOOPBACK_NAME(_mode, _counter)			\
-	"loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode)
-
-/**
- * efx_fill_loopback_test - fill in a block of loopback self-test entries
- * @efx:		Efx NIC
- * @lb_tests:		Efx loopback self-test results structure
- * @mode:		Loopback test mode
- * @test_index:		Starting index of the test
- * @strings:		Ethtool strings, or %NULL
- * @data:		Ethtool test results, or %NULL
- *
- * Fill in a block of loopback self-test entries.  Return new test
- * index.
- */
-static int efx_fill_loopback_test(struct efx_nic *efx,
-				  struct efx_loopback_self_tests *lb_tests,
-				  enum efx_loopback_mode mode,
-				  unsigned int test_index,
-				  u8 *strings, u64 *data)
-{
-	struct efx_channel *channel =
-		efx_get_channel(efx, efx->tx_channel_offset);
-	struct efx_tx_queue *tx_queue;
-
-	efx_for_each_channel_tx_queue(tx_queue, channel) {
-		efx_fill_test(test_index++, strings, data,
-			      &lb_tests->tx_sent[tx_queue->queue],
-			      EFX_TX_QUEUE_NAME(tx_queue),
-			      EFX_LOOPBACK_NAME(mode, "tx_sent"));
-		efx_fill_test(test_index++, strings, data,
-			      &lb_tests->tx_done[tx_queue->queue],
-			      EFX_TX_QUEUE_NAME(tx_queue),
-			      EFX_LOOPBACK_NAME(mode, "tx_done"));
-	}
-	efx_fill_test(test_index++, strings, data,
-		      &lb_tests->rx_good,
-		      "rx", 0,
-		      EFX_LOOPBACK_NAME(mode, "rx_good"));
-	efx_fill_test(test_index++, strings, data,
-		      &lb_tests->rx_bad,
-		      "rx", 0,
-		      EFX_LOOPBACK_NAME(mode, "rx_bad"));
-
-	return test_index;
-}
-
-/**
- * efx_ethtool_fill_self_tests - get self-test details
- * @efx:		Efx NIC
- * @tests:		Efx self-test results structure, or %NULL
- * @strings:		Ethtool strings, or %NULL
- * @data:		Ethtool test results, or %NULL
- *
- * Get self-test number of strings, strings, and/or test results.
- * Return number of strings (== number of test results).
- *
- * The reason for merging these three functions is to make sure that
- * they can never be inconsistent.
- */
-static int efx_ethtool_fill_self_tests(struct efx_nic *efx,
-				       struct efx_self_tests *tests,
-				       u8 *strings, u64 *data)
-{
-	struct efx_channel *channel;
-	unsigned int n = 0, i;
-	enum efx_loopback_mode mode;
-
-	efx_fill_test(n++, strings, data, &tests->phy_alive,
-		      "phy", 0, "alive", NULL);
-	efx_fill_test(n++, strings, data, &tests->nvram,
-		      "core", 0, "nvram", NULL);
-	efx_fill_test(n++, strings, data, &tests->interrupt,
-		      "core", 0, "interrupt", NULL);
-
-	/* Event queues */
-	efx_for_each_channel(channel, efx) {
-		efx_fill_test(n++, strings, data,
-			      &tests->eventq_dma[channel->channel],
-			      EFX_CHANNEL_NAME(channel),
-			      "eventq.dma", NULL);
-		efx_fill_test(n++, strings, data,
-			      &tests->eventq_int[channel->channel],
-			      EFX_CHANNEL_NAME(channel),
-			      "eventq.int", NULL);
-	}
-
-	efx_fill_test(n++, strings, data, &tests->memory,
-		      "core", 0, "memory", NULL);
-	efx_fill_test(n++, strings, data, &tests->registers,
-		      "core", 0, "registers", NULL);
-
-	if (efx->phy_op->run_tests != NULL) {
-		EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
-
-		for (i = 0; true; ++i) {
-			const char *name;
-
-			EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
-			name = efx->phy_op->test_name(efx, i);
-			if (name == NULL)
-				break;
-
-			efx_fill_test(n++, strings, data, &tests->phy_ext[i],
-				      "phy", 0, name, NULL);
-		}
-	}
-
-	/* Loopback tests */
-	for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
-		if (!(efx->loopback_modes & (1 << mode)))
-			continue;
-		n = efx_fill_loopback_test(efx,
-					   &tests->loopback[mode], mode, n,
-					   strings, data);
-	}
-
-	return n;
-}
-
-static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
-{
-	size_t n_stats = 0;
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx) {
-		if (efx_channel_has_tx_queues(channel)) {
-			n_stats++;
-			if (strings != NULL) {
-				snprintf(strings, ETH_GSTRING_LEN,
-					 "tx-%u.tx_packets",
-					 channel->tx_queue[0].queue /
-					 EFX_TXQ_TYPES);
-
-				strings += ETH_GSTRING_LEN;
-			}
-		}
-	}
-	efx_for_each_channel(channel, efx) {
-		if (efx_channel_has_rx_queue(channel)) {
-			n_stats++;
-			if (strings != NULL) {
-				snprintf(strings, ETH_GSTRING_LEN,
-					 "rx-%d.rx_packets", channel->channel);
-				strings += ETH_GSTRING_LEN;
-			}
-		}
-	}
-	if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
-		unsigned short xdp;
-
-		for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
-			n_stats++;
-			if (strings) {
-				snprintf(strings, ETH_GSTRING_LEN,
-					 "tx-xdp-cpu-%hu.tx_packets", xdp);
-				strings += ETH_GSTRING_LEN;
-			}
-		}
-	}
-
-	return n_stats;
-}
-
-static int efx_ethtool_get_sset_count(struct net_device *net_dev,
-				      int string_set)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-
-	switch (string_set) {
-	case ETH_SS_STATS:
-		return efx->type->describe_stats(efx, NULL) +
-		       EFX_ETHTOOL_SW_STAT_COUNT +
-		       efx_describe_per_queue_stats(efx, NULL) +
-		       efx_ptp_describe_stats(efx, NULL);
-	case ETH_SS_TEST:
-		return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
-	default:
-		return -EINVAL;
-	}
-}
-
-static void efx_ethtool_get_strings(struct net_device *net_dev,
-				    u32 string_set, u8 *strings)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	int i;
-
-	switch (string_set) {
-	case ETH_SS_STATS:
-		strings += (efx->type->describe_stats(efx, strings) *
-			    ETH_GSTRING_LEN);
-		for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++)
-			strlcpy(strings + i * ETH_GSTRING_LEN,
-				efx_sw_stat_desc[i].name, ETH_GSTRING_LEN);
-		strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
-		strings += (efx_describe_per_queue_stats(efx, strings) *
-			    ETH_GSTRING_LEN);
-		efx_ptp_describe_stats(efx, strings);
-		break;
-	case ETH_SS_TEST:
-		efx_ethtool_fill_self_tests(efx, NULL, strings, NULL);
-		break;
-	default:
-		/* No other string sets */
-		break;
-	}
-}
-
-static void efx_ethtool_get_stats(struct net_device *net_dev,
-				  struct ethtool_stats *stats,
-				  u64 *data)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	const struct efx_sw_stat_desc *stat;
-	struct efx_channel *channel;
-	struct efx_tx_queue *tx_queue;
-	struct efx_rx_queue *rx_queue;
-	int i;
-
-	spin_lock_bh(&efx->stats_lock);
-
-	/* Get NIC statistics */
-	data += efx->type->update_stats(efx, data, NULL);
-
-	/* Get software statistics */
-	for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) {
-		stat = &efx_sw_stat_desc[i];
-		switch (stat->source) {
-		case EFX_ETHTOOL_STAT_SOURCE_nic:
-			data[i] = stat->get_stat((void *)efx + stat->offset);
-			break;
-		case EFX_ETHTOOL_STAT_SOURCE_channel:
-			data[i] = 0;
-			efx_for_each_channel(channel, efx)
-				data[i] += stat->get_stat((void *)channel +
-							  stat->offset);
-			break;
-		case EFX_ETHTOOL_STAT_SOURCE_tx_queue:
-			data[i] = 0;
-			efx_for_each_channel(channel, efx) {
-				efx_for_each_channel_tx_queue(tx_queue, channel)
-					data[i] +=
-						stat->get_stat((void *)tx_queue
-							       + stat->offset);
-			}
-			break;
-		}
-	}
-	data += EFX_ETHTOOL_SW_STAT_COUNT;
-
-	spin_unlock_bh(&efx->stats_lock);
-
-	efx_for_each_channel(channel, efx) {
-		if (efx_channel_has_tx_queues(channel)) {
-			*data = 0;
-			efx_for_each_channel_tx_queue(tx_queue, channel) {
-				*data += tx_queue->tx_packets;
-			}
-			data++;
-		}
-	}
-	efx_for_each_channel(channel, efx) {
-		if (efx_channel_has_rx_queue(channel)) {
-			*data = 0;
-			efx_for_each_channel_rx_queue(rx_queue, channel) {
-				*data += rx_queue->rx_packets;
-			}
-			data++;
-		}
-	}
-	if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
-		int xdp;
-
-		for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
-			data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
-			data++;
-		}
-	}
-
-	efx_ptp_update_stats(efx, data);
-}
-
 static void efx_ethtool_self_test(struct net_device *net_dev,
 				  struct ethtool_test *test, u64 *data)
 {
@@ -787,16 +361,6 @@ out:
 	return rc;
 }
 
-static void efx_ethtool_get_pauseparam(struct net_device *net_dev,
-				       struct ethtool_pauseparam *pause)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-
-	pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX);
-	pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX);
-	pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO);
-}
-
 static void efx_ethtool_get_wol(struct net_device *net_dev,
 				struct ethtool_wolinfo *wol)
 {
@@ -1456,7 +1020,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
 			rc = -ENOMEM;
 			goto out_unlock;
 		}
-		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+		ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
 		/* Initialise indir table and key to defaults */
 		efx_set_default_rx_indir_table(efx, ctx);
 		netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c
new file mode 100644
index 000000000000..b8d281ab6c7a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ethtool_common.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include "net_driver.h"
+#include "mcdi.h"
+#include "nic.h"
+#include "selftest.h"
+#include "ethtool_common.h"
+
+struct efx_sw_stat_desc {
+	const char *name;
+	enum {
+		EFX_ETHTOOL_STAT_SOURCE_nic,
+		EFX_ETHTOOL_STAT_SOURCE_channel,
+		EFX_ETHTOOL_STAT_SOURCE_tx_queue
+	} source;
+	unsigned int offset;
+	u64 (*get_stat)(void *field); /* Reader function */
+};
+
+/* Initialiser for a struct efx_sw_stat_desc with type-checking */
+#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
+				get_stat_function) {			\
+	.name = #stat_name,						\
+	.source = EFX_ETHTOOL_STAT_SOURCE_##source_name,		\
+	.offset = ((((field_type *) 0) ==				\
+		      &((struct efx_##source_name *)0)->field) ?	\
+		    offsetof(struct efx_##source_name, field) :		\
+		    offsetof(struct efx_##source_name, field)),		\
+	.get_stat = get_stat_function,					\
+}
+
+static u64 efx_get_uint_stat(void *field)
+{
+	return *(unsigned int *)field;
+}
+
+static u64 efx_get_atomic_stat(void *field)
+{
+	return atomic_read((atomic_t *) field);
+}
+
+#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field)		\
+	EFX_ETHTOOL_STAT(field, nic, field,			\
+			 atomic_t, efx_get_atomic_stat)
+
+#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field)			\
+	EFX_ETHTOOL_STAT(field, channel, n_##field,		\
+			 unsigned int, efx_get_uint_stat)
+#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field)		\
+	EFX_ETHTOOL_STAT(field, channel, field,			\
+			 unsigned int, efx_get_uint_stat)
+
+#define EFX_ETHTOOL_UINT_TXQ_STAT(field)			\
+	EFX_ETHTOOL_STAT(tx_##field, tx_queue, field,		\
+			 unsigned int, efx_get_uint_stat)
+
+static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
+	EFX_ETHTOOL_UINT_TXQ_STAT(merge_events),
+	EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
+	EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
+	EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
+	EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
+	EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
+	EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
+	EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
+	EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
+#ifdef CONFIG_RFS_ACCEL
+	EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded),
+	EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed),
+#endif
+};
+
+#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
+
+void efx_ethtool_get_drvinfo(struct net_device *net_dev,
+			     struct ethtool_drvinfo *info)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
+	efx_mcdi_print_fwver(efx, info->fw_version,
+			     sizeof(info->fw_version));
+	strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
+}
+
+u32 efx_ethtool_get_msglevel(struct net_device *net_dev)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	return efx->msg_enable;
+}
+
+void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	efx->msg_enable = msg_enable;
+}
+
+void efx_ethtool_get_pauseparam(struct net_device *net_dev,
+				struct ethtool_pauseparam *pause)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX);
+	pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX);
+	pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO);
+}
+
+/**
+ * efx_fill_test - fill in an individual self-test entry
+ * @test_index:		Index of the test
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ * @test:		Pointer to test result (used only if data != %NULL)
+ * @unit_format:	Unit name format (e.g. "chan\%d")
+ * @unit_id:		Unit id (e.g. 0 for "chan0")
+ * @test_format:	Test name format (e.g. "loopback.\%s.tx.sent")
+ * @test_id:		Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
+ *
+ * Fill in an individual self-test entry.
+ */
+static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data,
+			  int *test, const char *unit_format, int unit_id,
+			  const char *test_format, const char *test_id)
+{
+	char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
+
+	/* Fill data value, if applicable */
+	if (data)
+		data[test_index] = *test;
+
+	/* Fill string, if applicable */
+	if (strings) {
+		if (strchr(unit_format, '%'))
+			snprintf(unit_str, sizeof(unit_str),
+				 unit_format, unit_id);
+		else
+			strcpy(unit_str, unit_format);
+		snprintf(test_str, sizeof(test_str), test_format, test_id);
+		snprintf(strings + test_index * ETH_GSTRING_LEN,
+			 ETH_GSTRING_LEN,
+			 "%-6s %-24s", unit_str, test_str);
+	}
+}
+
+#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel
+#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
+#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
+#define EFX_LOOPBACK_NAME(_mode, _counter)			\
+	"loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode)
+
+/**
+ * efx_fill_loopback_test - fill in a block of loopback self-test entries
+ * @efx:		Efx NIC
+ * @lb_tests:		Efx loopback self-test results structure
+ * @mode:		Loopback test mode
+ * @test_index:		Starting index of the test
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ *
+ * Fill in a block of loopback self-test entries.  Return new test
+ * index.
+ */
+static int efx_fill_loopback_test(struct efx_nic *efx,
+				  struct efx_loopback_self_tests *lb_tests,
+				  enum efx_loopback_mode mode,
+				  unsigned int test_index,
+				  u8 *strings, u64 *data)
+{
+	struct efx_channel *channel =
+		efx_get_channel(efx, efx->tx_channel_offset);
+	struct efx_tx_queue *tx_queue;
+
+	efx_for_each_channel_tx_queue(tx_queue, channel) {
+		efx_fill_test(test_index++, strings, data,
+			      &lb_tests->tx_sent[tx_queue->queue],
+			      EFX_TX_QUEUE_NAME(tx_queue),
+			      EFX_LOOPBACK_NAME(mode, "tx_sent"));
+		efx_fill_test(test_index++, strings, data,
+			      &lb_tests->tx_done[tx_queue->queue],
+			      EFX_TX_QUEUE_NAME(tx_queue),
+			      EFX_LOOPBACK_NAME(mode, "tx_done"));
+	}
+	efx_fill_test(test_index++, strings, data,
+		      &lb_tests->rx_good,
+		      "rx", 0,
+		      EFX_LOOPBACK_NAME(mode, "rx_good"));
+	efx_fill_test(test_index++, strings, data,
+		      &lb_tests->rx_bad,
+		      "rx", 0,
+		      EFX_LOOPBACK_NAME(mode, "rx_bad"));
+
+	return test_index;
+}
+
+/**
+ * efx_ethtool_fill_self_tests - get self-test details
+ * @efx:		Efx NIC
+ * @tests:		Efx self-test results structure, or %NULL
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ *
+ * Get self-test number of strings, strings, and/or test results.
+ * Return number of strings (== number of test results).
+ *
+ * The reason for merging these three functions is to make sure that
+ * they can never be inconsistent.
+ */
+int efx_ethtool_fill_self_tests(struct efx_nic *efx,
+				struct efx_self_tests *tests,
+				u8 *strings, u64 *data)
+{
+	struct efx_channel *channel;
+	unsigned int n = 0, i;
+	enum efx_loopback_mode mode;
+
+	efx_fill_test(n++, strings, data, &tests->phy_alive,
+		      "phy", 0, "alive", NULL);
+	efx_fill_test(n++, strings, data, &tests->nvram,
+		      "core", 0, "nvram", NULL);
+	efx_fill_test(n++, strings, data, &tests->interrupt,
+		      "core", 0, "interrupt", NULL);
+
+	/* Event queues */
+	efx_for_each_channel(channel, efx) {
+		efx_fill_test(n++, strings, data,
+			      &tests->eventq_dma[channel->channel],
+			      EFX_CHANNEL_NAME(channel),
+			      "eventq.dma", NULL);
+		efx_fill_test(n++, strings, data,
+			      &tests->eventq_int[channel->channel],
+			      EFX_CHANNEL_NAME(channel),
+			      "eventq.int", NULL);
+	}
+
+	efx_fill_test(n++, strings, data, &tests->memory,
+		      "core", 0, "memory", NULL);
+	efx_fill_test(n++, strings, data, &tests->registers,
+		      "core", 0, "registers", NULL);
+
+	if (efx->phy_op->run_tests != NULL) {
+		EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
+
+		for (i = 0; true; ++i) {
+			const char *name;
+
+			EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
+			name = efx->phy_op->test_name(efx, i);
+			if (name == NULL)
+				break;
+
+			efx_fill_test(n++, strings, data, &tests->phy_ext[i],
+				      "phy", 0, name, NULL);
+		}
+	}
+
+	/* Loopback tests */
+	for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+		if (!(efx->loopback_modes & (1 << mode)))
+			continue;
+		n = efx_fill_loopback_test(efx,
+					   &tests->loopback[mode], mode, n,
+					   strings, data);
+	}
+
+	return n;
+}
+
+static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
+{
+	size_t n_stats = 0;
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx) {
+		if (efx_channel_has_tx_queues(channel)) {
+			n_stats++;
+			if (strings != NULL) {
+				snprintf(strings, ETH_GSTRING_LEN,
+					 "tx-%u.tx_packets",
+					 channel->tx_queue[0].queue /
+					 EFX_TXQ_TYPES);
+
+				strings += ETH_GSTRING_LEN;
+			}
+		}
+	}
+	efx_for_each_channel(channel, efx) {
+		if (efx_channel_has_rx_queue(channel)) {
+			n_stats++;
+			if (strings != NULL) {
+				snprintf(strings, ETH_GSTRING_LEN,
+					 "rx-%d.rx_packets", channel->channel);
+				strings += ETH_GSTRING_LEN;
+			}
+		}
+	}
+	if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+		unsigned short xdp;
+
+		for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+			n_stats++;
+			if (strings) {
+				snprintf(strings, ETH_GSTRING_LEN,
+					 "tx-xdp-cpu-%hu.tx_packets", xdp);
+				strings += ETH_GSTRING_LEN;
+			}
+		}
+	}
+
+	return n_stats;
+}
+
+int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	switch (string_set) {
+	case ETH_SS_STATS:
+		return efx->type->describe_stats(efx, NULL) +
+		       EFX_ETHTOOL_SW_STAT_COUNT +
+		       efx_describe_per_queue_stats(efx, NULL) +
+		       efx_ptp_describe_stats(efx, NULL);
+	case ETH_SS_TEST:
+		return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
+	default:
+		return -EINVAL;
+	}
+}
+
+void efx_ethtool_get_strings(struct net_device *net_dev,
+			     u32 string_set, u8 *strings)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int i;
+
+	switch (string_set) {
+	case ETH_SS_STATS:
+		strings += (efx->type->describe_stats(efx, strings) *
+			    ETH_GSTRING_LEN);
+		for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++)
+			strlcpy(strings + i * ETH_GSTRING_LEN,
+				efx_sw_stat_desc[i].name, ETH_GSTRING_LEN);
+		strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
+		strings += (efx_describe_per_queue_stats(efx, strings) *
+			    ETH_GSTRING_LEN);
+		efx_ptp_describe_stats(efx, strings);
+		break;
+	case ETH_SS_TEST:
+		efx_ethtool_fill_self_tests(efx, NULL, strings, NULL);
+		break;
+	default:
+		/* No other string sets */
+		break;
+	}
+}
+
+void efx_ethtool_get_stats(struct net_device *net_dev,
+			   struct ethtool_stats *stats,
+			   u64 *data)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	const struct efx_sw_stat_desc *stat;
+	struct efx_channel *channel;
+	struct efx_tx_queue *tx_queue;
+	struct efx_rx_queue *rx_queue;
+	int i;
+
+	spin_lock_bh(&efx->stats_lock);
+
+	/* Get NIC statistics */
+	data += efx->type->update_stats(efx, data, NULL);
+
+	/* Get software statistics */
+	for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) {
+		stat = &efx_sw_stat_desc[i];
+		switch (stat->source) {
+		case EFX_ETHTOOL_STAT_SOURCE_nic:
+			data[i] = stat->get_stat((void *)efx + stat->offset);
+			break;
+		case EFX_ETHTOOL_STAT_SOURCE_channel:
+			data[i] = 0;
+			efx_for_each_channel(channel, efx)
+				data[i] += stat->get_stat((void *)channel +
+							  stat->offset);
+			break;
+		case EFX_ETHTOOL_STAT_SOURCE_tx_queue:
+			data[i] = 0;
+			efx_for_each_channel(channel, efx) {
+				efx_for_each_channel_tx_queue(tx_queue, channel)
+					data[i] +=
+						stat->get_stat((void *)tx_queue
+							       + stat->offset);
+			}
+			break;
+		}
+	}
+	data += EFX_ETHTOOL_SW_STAT_COUNT;
+
+	spin_unlock_bh(&efx->stats_lock);
+
+	efx_for_each_channel(channel, efx) {
+		if (efx_channel_has_tx_queues(channel)) {
+			*data = 0;
+			efx_for_each_channel_tx_queue(tx_queue, channel) {
+				*data += tx_queue->tx_packets;
+			}
+			data++;
+		}
+	}
+	efx_for_each_channel(channel, efx) {
+		if (efx_channel_has_rx_queue(channel)) {
+			*data = 0;
+			efx_for_each_channel_rx_queue(rx_queue, channel) {
+				*data += rx_queue->rx_packets;
+			}
+			data++;
+		}
+	}
+	if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+		int xdp;
+
+		for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+			data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
+			data++;
+		}
+	}
+
+	efx_ptp_update_stats(efx, data);
+}
diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h
new file mode 100644
index 000000000000..fa624313f330
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ethtool_common.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_ETHTOOL_COMMON_H
+#define EFX_ETHTOOL_COMMON_H
+
+void efx_ethtool_get_drvinfo(struct net_device *net_dev,
+			     struct ethtool_drvinfo *info);
+u32 efx_ethtool_get_msglevel(struct net_device *net_dev);
+void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable);
+void efx_ethtool_get_pauseparam(struct net_device *net_dev,
+				struct ethtool_pauseparam *pause);
+int efx_ethtool_fill_self_tests(struct efx_nic *efx,
+				struct efx_self_tests *tests,
+				u8 *strings, u64 *data);
+int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set);
+void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set,
+			     u8 *strings);
+void efx_ethtool_get_stats(struct net_device *net_dev,
+			   struct ethtool_stats *stats __attribute__ ((unused)),
+			   u64 *data);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index eecc348b1c32..42bcd34fc508 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1265,7 +1265,7 @@ static int ef4_init_io(struct ef4_nic *efx)
 		rc = -EIO;
 		goto fail3;
 	}
-	efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
+	efx->membase = ioremap(efx->membase_phys, mem_map_size);
 	if (!efx->membase) {
 		netif_err(efx, probe, efx->net_dev,
 			  "could not map memory BAR at %llx+%x\n",
@@ -2108,7 +2108,7 @@ static void ef4_net_stats(struct net_device *net_dev,
 }
 
 /* Context: netif_tx_lock held, BHs disabled. */
-static void ef4_watchdog(struct net_device *net_dev)
+static void ef4_watchdog(struct net_device *net_dev, unsigned int txqueue)
 {
 	struct ef4_nic *efx = netdev_priv(net_dev);
 
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index eedd32e2bfcb..dbbb898adddb 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -15,6 +15,7 @@
 #include "net_driver.h"
 #include "bitfield.h"
 #include "efx.h"
+#include "rx_common.h"
 #include "nic.h"
 #include "farch_regs.h"
 #include "sriov.h"
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 9081f84a2604..54a45010b576 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -346,11 +346,8 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx);
 int efx_mcdi_port_probe(struct efx_nic *efx);
 void efx_mcdi_port_remove(struct efx_nic *efx);
 int efx_mcdi_port_reconfigure(struct efx_nic *efx);
-int efx_mcdi_port_get_number(struct efx_nic *efx);
 u32 efx_mcdi_phy_get_caps(struct efx_nic *efx);
 void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
-int efx_mcdi_set_mac(struct efx_nic *efx);
-#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1))
 void efx_mcdi_mac_start_stats(struct efx_nic *efx);
 void efx_mcdi_mac_stop_stats(struct efx_nic *efx);
 void efx_mcdi_mac_pull_stats(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
new file mode 100644
index 000000000000..4310ae5bd898
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -0,0 +1,2270 @@
+#include "mcdi_filters.h"
+#include "mcdi.h"
+#include "nic.h"
+#include "rx_common.h"
+
+/* The maximum size of a shared RSS context */
+/* TODO: this should really be from the mcdi protocol export */
+#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
+
+#define EFX_EF10_FILTER_ID_INVALID 0xffff
+
+/* An arbitrary search limit for the software hash table */
+#define EFX_EF10_FILTER_SEARCH_LIMIT 200
+
+static struct efx_filter_spec *
+efx_mcdi_filter_entry_spec(const struct efx_mcdi_filter_table *table,
+			   unsigned int filter_idx)
+{
+	return (struct efx_filter_spec *)(table->entry[filter_idx].spec &
+					  ~EFX_EF10_FILTER_FLAGS);
+}
+
+static unsigned int
+efx_mcdi_filter_entry_flags(const struct efx_mcdi_filter_table *table,
+			   unsigned int filter_idx)
+{
+	return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS;
+}
+
+static u32 efx_mcdi_filter_get_unsafe_id(u32 filter_id)
+{
+	WARN_ON_ONCE(filter_id == EFX_EF10_FILTER_ID_INVALID);
+	return filter_id & (EFX_MCDI_FILTER_TBL_ROWS - 1);
+}
+
+static unsigned int efx_mcdi_filter_get_unsafe_pri(u32 filter_id)
+{
+	return filter_id / (EFX_MCDI_FILTER_TBL_ROWS * 2);
+}
+
+static u32 efx_mcdi_filter_make_filter_id(unsigned int pri, u16 idx)
+{
+	return pri * EFX_MCDI_FILTER_TBL_ROWS * 2 + idx;
+}
+
+/*
+ * Decide whether a filter should be exclusive or else should allow
+ * delivery to additional recipients.  Currently we decide that
+ * filters for specific local unicast MAC and IP addresses are
+ * exclusive.
+ */
+static bool efx_mcdi_filter_is_exclusive(const struct efx_filter_spec *spec)
+{
+	if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC &&
+	    !is_multicast_ether_addr(spec->loc_mac))
+		return true;
+
+	if ((spec->match_flags &
+	     (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
+	    (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
+		if (spec->ether_type == htons(ETH_P_IP) &&
+		    !ipv4_is_multicast(spec->loc_host[0]))
+			return true;
+		if (spec->ether_type == htons(ETH_P_IPV6) &&
+		    ((const u8 *)spec->loc_host)[0] != 0xff)
+			return true;
+	}
+
+	return false;
+}
+
+static void
+efx_mcdi_filter_set_entry(struct efx_mcdi_filter_table *table,
+			  unsigned int filter_idx,
+			  const struct efx_filter_spec *spec,
+			  unsigned int flags)
+{
+	table->entry[filter_idx].spec =	(unsigned long)spec | flags;
+}
+
+static void
+efx_mcdi_filter_push_prep_set_match_fields(struct efx_nic *efx,
+					   const struct efx_filter_spec *spec,
+					   efx_dword_t *inbuf)
+{
+	enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
+	u32 match_fields = 0, uc_match, mc_match;
+
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+		       efx_mcdi_filter_is_exclusive(spec) ?
+		       MC_CMD_FILTER_OP_IN_OP_INSERT :
+		       MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE);
+
+	/*
+	 * Convert match flags and values.  Unlike almost
+	 * everything else in MCDI, these fields are in
+	 * network byte order.
+	 */
+#define COPY_VALUE(value, mcdi_field)					     \
+	do {							     \
+		match_fields |=					     \
+			1 << MC_CMD_FILTER_OP_IN_MATCH_ ##	     \
+			mcdi_field ## _LBN;			     \
+		BUILD_BUG_ON(					     \
+			MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \
+			sizeof(value));				     \
+		memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ##	mcdi_field), \
+		       &value, sizeof(value));			     \
+	} while (0)
+#define COPY_FIELD(gen_flag, gen_field, mcdi_field)			     \
+	if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) {     \
+		COPY_VALUE(spec->gen_field, mcdi_field);	     \
+	}
+	/*
+	 * Handle encap filters first.  They will always be mismatch
+	 * (unknown UC or MC) filters
+	 */
+	if (encap_type) {
+		/*
+		 * ether_type and outer_ip_proto need to be variables
+		 * because COPY_VALUE wants to memcpy them
+		 */
+		__be16 ether_type =
+			htons(encap_type & EFX_ENCAP_FLAG_IPV6 ?
+			      ETH_P_IPV6 : ETH_P_IP);
+		u8 vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE;
+		u8 outer_ip_proto;
+
+		switch (encap_type & EFX_ENCAP_TYPES_MASK) {
+		case EFX_ENCAP_TYPE_VXLAN:
+			vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN;
+			/* fallthrough */
+		case EFX_ENCAP_TYPE_GENEVE:
+			COPY_VALUE(ether_type, ETHER_TYPE);
+			outer_ip_proto = IPPROTO_UDP;
+			COPY_VALUE(outer_ip_proto, IP_PROTO);
+			/*
+			 * We always need to set the type field, even
+			 * though we're not matching on the TNI.
+			 */
+			MCDI_POPULATE_DWORD_1(inbuf,
+				FILTER_OP_EXT_IN_VNI_OR_VSID,
+				FILTER_OP_EXT_IN_VNI_TYPE,
+				vni_type);
+			break;
+		case EFX_ENCAP_TYPE_NVGRE:
+			COPY_VALUE(ether_type, ETHER_TYPE);
+			outer_ip_proto = IPPROTO_GRE;
+			COPY_VALUE(outer_ip_proto, IP_PROTO);
+			break;
+		default:
+			WARN_ON(1);
+		}
+
+		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
+		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
+	} else {
+		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
+		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
+	}
+
+	if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG)
+		match_fields |=
+			is_multicast_ether_addr(spec->loc_mac) ?
+			1 << mc_match :
+			1 << uc_match;
+	COPY_FIELD(REM_HOST, rem_host, SRC_IP);
+	COPY_FIELD(LOC_HOST, loc_host, DST_IP);
+	COPY_FIELD(REM_MAC, rem_mac, SRC_MAC);
+	COPY_FIELD(REM_PORT, rem_port, SRC_PORT);
+	COPY_FIELD(LOC_MAC, loc_mac, DST_MAC);
+	COPY_FIELD(LOC_PORT, loc_port, DST_PORT);
+	COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE);
+	COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN);
+	COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN);
+	COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO);
+#undef COPY_FIELD
+#undef COPY_VALUE
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS,
+		       match_fields);
+}
+
+static void efx_mcdi_filter_push_prep(struct efx_nic *efx,
+				      const struct efx_filter_spec *spec,
+				      efx_dword_t *inbuf, u64 handle,
+				      struct efx_rss_context *ctx,
+				      bool replacing)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	u32 flags = spec->flags;
+
+	memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
+
+	/* If RSS filter, caller better have given us an RSS context */
+	if (flags & EFX_FILTER_FLAG_RX_RSS) {
+		/*
+		 * We don't have the ability to return an error, so we'll just
+		 * log a warning and disable RSS for the filter.
+		 */
+		if (WARN_ON_ONCE(!ctx))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+		else if (WARN_ON_ONCE(ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+	}
+
+	if (replacing) {
+		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+			       MC_CMD_FILTER_OP_IN_OP_REPLACE);
+		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle);
+	} else {
+		efx_mcdi_filter_push_prep_set_match_fields(efx, spec, inbuf);
+	}
+
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id);
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST,
+		       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
+		       MC_CMD_FILTER_OP_IN_RX_DEST_DROP :
+		       MC_CMD_FILTER_OP_IN_RX_DEST_HOST);
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0);
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST,
+		       MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT);
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE,
+		       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
+		       0 : spec->dmaq_id);
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE,
+		       (flags & EFX_FILTER_FLAG_RX_RSS) ?
+		       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
+		       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
+	if (flags & EFX_FILTER_FLAG_RX_RSS)
+		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
+}
+
+static int efx_mcdi_filter_push(struct efx_nic *efx,
+				const struct efx_filter_spec *spec, u64 *handle,
+				struct efx_rss_context *ctx, bool replacing)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	efx_mcdi_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf), &outlen);
+	if (rc && spec->priority != EFX_FILTER_PRI_HINT)
+		efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf),
+				       outbuf, outlen, rc);
+	if (rc == 0)
+		*handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
+	if (rc == -ENOSPC)
+		rc = -EBUSY; /* to match efx_farch_filter_insert() */
+	return rc;
+}
+
+static u32 efx_mcdi_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec)
+{
+	enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
+	unsigned int match_flags = spec->match_flags;
+	unsigned int uc_match, mc_match;
+	u32 mcdi_flags = 0;
+
+#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field, encap) {		\
+		unsigned int  old_match_flags = match_flags;		\
+		match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag;		\
+		if (match_flags != old_match_flags)			\
+			mcdi_flags |=					\
+				(1 << ((encap) ?			\
+				       MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_ ## \
+				       mcdi_field ## _LBN :		\
+				       MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##\
+				       mcdi_field ## _LBN));		\
+	}
+	/* inner or outer based on encap type */
+	MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP, encap_type);
+	MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP, encap_type);
+	MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC, encap_type);
+	MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT, encap_type);
+	MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC, encap_type);
+	MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT, encap_type);
+	MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE, encap_type);
+	MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO, encap_type);
+	/* always outer */
+	MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN, false);
+	MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN, false);
+#undef MAP_FILTER_TO_MCDI_FLAG
+
+	/* special handling for encap type, and mismatch */
+	if (encap_type) {
+		match_flags &= ~EFX_FILTER_MATCH_ENCAP_TYPE;
+		mcdi_flags |=
+			(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
+		mcdi_flags |= (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
+
+		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
+		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
+	} else {
+		uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
+		mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
+	}
+
+	if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) {
+		match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG;
+		mcdi_flags |=
+			is_multicast_ether_addr(spec->loc_mac) ?
+			1 << mc_match :
+			1 << uc_match;
+	}
+
+	/* Did we map them all? */
+	WARN_ON_ONCE(match_flags);
+
+	return mcdi_flags;
+}
+
+static int efx_mcdi_filter_pri(struct efx_mcdi_filter_table *table,
+			       const struct efx_filter_spec *spec)
+{
+	u32 mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec);
+	unsigned int match_pri;
+
+	for (match_pri = 0;
+	     match_pri < table->rx_match_count;
+	     match_pri++)
+		if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags)
+			return match_pri;
+
+	return -EPROTONOSUPPORT;
+}
+
+static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
+					 struct efx_filter_spec *spec,
+					 bool replace_equal)
+{
+	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_mcdi_filter_table *table;
+	struct efx_filter_spec *saved_spec;
+	struct efx_rss_context *ctx = NULL;
+	unsigned int match_pri, hash;
+	unsigned int priv_flags;
+	bool rss_locked = false;
+	bool replacing = false;
+	unsigned int depth, i;
+	int ins_index = -1;
+	DEFINE_WAIT(wait);
+	bool is_mc_recip;
+	s32 rc;
+
+	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
+	table = efx->filter_state;
+	down_write(&table->lock);
+
+	/* For now, only support RX filters */
+	if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
+	    EFX_FILTER_FLAG_RX) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	rc = efx_mcdi_filter_pri(table, spec);
+	if (rc < 0)
+		goto out_unlock;
+	match_pri = rc;
+
+	hash = efx_filter_spec_hash(spec);
+	is_mc_recip = efx_filter_is_mc_recipient(spec);
+	if (is_mc_recip)
+		bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+
+	if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+		mutex_lock(&efx->rss_lock);
+		rss_locked = true;
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
+		else
+			ctx = &efx->rss_context;
+		if (!ctx) {
+			rc = -ENOENT;
+			goto out_unlock;
+		}
+		if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
+			rc = -EOPNOTSUPP;
+			goto out_unlock;
+		}
+	}
+
+	/* Find any existing filters with the same match tuple or
+	 * else a free slot to insert at.
+	 */
+	for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
+		i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1);
+		saved_spec = efx_mcdi_filter_entry_spec(table, i);
+
+		if (!saved_spec) {
+			if (ins_index < 0)
+				ins_index = i;
+		} else if (efx_filter_spec_equal(spec, saved_spec)) {
+			if (spec->priority < saved_spec->priority &&
+			    spec->priority != EFX_FILTER_PRI_AUTO) {
+				rc = -EPERM;
+				goto out_unlock;
+			}
+			if (!is_mc_recip) {
+				/* This is the only one */
+				if (spec->priority ==
+				    saved_spec->priority &&
+				    !replace_equal) {
+					rc = -EEXIST;
+					goto out_unlock;
+				}
+				ins_index = i;
+				break;
+			} else if (spec->priority >
+				   saved_spec->priority ||
+				   (spec->priority ==
+				    saved_spec->priority &&
+				    replace_equal)) {
+				if (ins_index < 0)
+					ins_index = i;
+				else
+					__set_bit(depth, mc_rem_map);
+			}
+		}
+	}
+
+	/* Once we reach the maximum search depth, use the first suitable
+	 * slot, or return -EBUSY if there was none
+	 */
+	if (ins_index < 0) {
+		rc = -EBUSY;
+		goto out_unlock;
+	}
+
+	/* Create a software table entry if necessary. */
+	saved_spec = efx_mcdi_filter_entry_spec(table, ins_index);
+	if (saved_spec) {
+		if (spec->priority == EFX_FILTER_PRI_AUTO &&
+		    saved_spec->priority >= EFX_FILTER_PRI_AUTO) {
+			/* Just make sure it won't be removed */
+			if (saved_spec->priority > EFX_FILTER_PRI_AUTO)
+				saved_spec->flags |= EFX_FILTER_FLAG_RX_OVER_AUTO;
+			table->entry[ins_index].spec &=
+				~EFX_EF10_FILTER_FLAG_AUTO_OLD;
+			rc = ins_index;
+			goto out_unlock;
+		}
+		replacing = true;
+		priv_flags = efx_mcdi_filter_entry_flags(table, ins_index);
+	} else {
+		saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
+		if (!saved_spec) {
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
+		*saved_spec = *spec;
+		priv_flags = 0;
+	}
+	efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags);
+
+	/* Actually insert the filter on the HW */
+	rc = efx_mcdi_filter_push(efx, spec, &table->entry[ins_index].handle,
+				  ctx, replacing);
+
+	if (rc == -EINVAL && nic_data->must_realloc_vis)
+		/* The MC rebooted under us, causing it to reject our filter
+		 * insertion as pointing to an invalid VI (spec->dmaq_id).
+		 */
+		rc = -EAGAIN;
+
+	/* Finalise the software table entry */
+	if (rc == 0) {
+		if (replacing) {
+			/* Update the fields that may differ */
+			if (saved_spec->priority == EFX_FILTER_PRI_AUTO)
+				saved_spec->flags |=
+					EFX_FILTER_FLAG_RX_OVER_AUTO;
+			saved_spec->priority = spec->priority;
+			saved_spec->flags &= EFX_FILTER_FLAG_RX_OVER_AUTO;
+			saved_spec->flags |= spec->flags;
+			saved_spec->rss_context = spec->rss_context;
+			saved_spec->dmaq_id = spec->dmaq_id;
+		}
+	} else if (!replacing) {
+		kfree(saved_spec);
+		saved_spec = NULL;
+	} else {
+		/* We failed to replace, so the old filter is still present.
+		 * Roll back the software table to reflect this.  In fact the
+		 * efx_mcdi_filter_set_entry() call below will do the right
+		 * thing, so nothing extra is needed here.
+		 */
+	}
+	efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags);
+
+	/* Remove and finalise entries for lower-priority multicast
+	 * recipients
+	 */
+	if (is_mc_recip) {
+		MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
+		unsigned int depth, i;
+
+		memset(inbuf, 0, sizeof(inbuf));
+
+		for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
+			if (!test_bit(depth, mc_rem_map))
+				continue;
+
+			i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1);
+			saved_spec = efx_mcdi_filter_entry_spec(table, i);
+			priv_flags = efx_mcdi_filter_entry_flags(table, i);
+
+			if (rc == 0) {
+				MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+					       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
+				MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
+					       table->entry[i].handle);
+				rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
+						  inbuf, sizeof(inbuf),
+						  NULL, 0, NULL);
+			}
+
+			if (rc == 0) {
+				kfree(saved_spec);
+				saved_spec = NULL;
+				priv_flags = 0;
+			}
+			efx_mcdi_filter_set_entry(table, i, saved_spec,
+						  priv_flags);
+		}
+	}
+
+	/* If successful, return the inserted filter ID */
+	if (rc == 0)
+		rc = efx_mcdi_filter_make_filter_id(match_pri, ins_index);
+
+out_unlock:
+	if (rss_locked)
+		mutex_unlock(&efx->rss_lock);
+	up_write(&table->lock);
+	return rc;
+}
+
+s32 efx_mcdi_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec,
+			   bool replace_equal)
+{
+	s32 ret;
+
+	down_read(&efx->filter_sem);
+	ret = efx_mcdi_filter_insert_locked(efx, spec, replace_equal);
+	up_read(&efx->filter_sem);
+
+	return ret;
+}
+
+/*
+ * Remove a filter.
+ * If !by_index, remove by ID
+ * If by_index, remove by index
+ * Filter ID may come from userland and must be range-checked.
+ * Caller must hold efx->filter_sem for read, and efx->filter_state->lock
+ * for write.
+ */
+static int efx_mcdi_filter_remove_internal(struct efx_nic *efx,
+					   unsigned int priority_mask,
+					   u32 filter_id, bool by_index)
+{
+	unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id);
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	MCDI_DECLARE_BUF(inbuf,
+			 MC_CMD_FILTER_OP_IN_HANDLE_OFST +
+			 MC_CMD_FILTER_OP_IN_HANDLE_LEN);
+	struct efx_filter_spec *spec;
+	DEFINE_WAIT(wait);
+	int rc;
+
+	spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+	if (!spec ||
+	    (!by_index &&
+	     efx_mcdi_filter_pri(table, spec) !=
+	     efx_mcdi_filter_get_unsafe_pri(filter_id)))
+		return -ENOENT;
+
+	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO &&
+	    priority_mask == (1U << EFX_FILTER_PRI_AUTO)) {
+		/* Just remove flags */
+		spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO;
+		table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
+		return 0;
+	}
+
+	if (!(priority_mask & (1U << spec->priority)))
+		return -ENOENT;
+
+	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
+		/* Reset to an automatic filter */
+
+		struct efx_filter_spec new_spec = *spec;
+
+		new_spec.priority = EFX_FILTER_PRI_AUTO;
+		new_spec.flags = (EFX_FILTER_FLAG_RX |
+				  (efx_rss_active(&efx->rss_context) ?
+				   EFX_FILTER_FLAG_RX_RSS : 0));
+		new_spec.dmaq_id = 0;
+		new_spec.rss_context = 0;
+		rc = efx_mcdi_filter_push(efx, &new_spec,
+					  &table->entry[filter_idx].handle,
+					  &efx->rss_context,
+					  true);
+
+		if (rc == 0)
+			*spec = new_spec;
+	} else {
+		/* Really remove the filter */
+
+		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+			       efx_mcdi_filter_is_exclusive(spec) ?
+			       MC_CMD_FILTER_OP_IN_OP_REMOVE :
+			       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
+		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
+			       table->entry[filter_idx].handle);
+		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP,
+					inbuf, sizeof(inbuf), NULL, 0, NULL);
+
+		if ((rc == 0) || (rc == -ENOENT)) {
+			/* Filter removed OK or didn't actually exist */
+			kfree(spec);
+			efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0);
+		} else {
+			efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
+					       MC_CMD_FILTER_OP_EXT_IN_LEN,
+					       NULL, 0, rc);
+		}
+	}
+
+	return rc;
+}
+
+/* Remove filters that weren't renewed. */
+static void efx_mcdi_filter_remove_old(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	int remove_failed = 0;
+	int remove_noent = 0;
+	int rc;
+	int i;
+
+	down_write(&table->lock);
+	for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) {
+		if (READ_ONCE(table->entry[i].spec) &
+		    EFX_EF10_FILTER_FLAG_AUTO_OLD) {
+			rc = efx_mcdi_filter_remove_internal(efx,
+					1U << EFX_FILTER_PRI_AUTO, i, true);
+			if (rc == -ENOENT)
+				remove_noent++;
+			else if (rc)
+				remove_failed++;
+		}
+	}
+	up_write(&table->lock);
+
+	if (remove_failed)
+		netif_info(efx, drv, efx->net_dev,
+			   "%s: failed to remove %d filters\n",
+			   __func__, remove_failed);
+	if (remove_noent)
+		netif_info(efx, drv, efx->net_dev,
+			   "%s: failed to remove %d non-existent filters\n",
+			   __func__, remove_noent);
+}
+
+int efx_mcdi_filter_remove_safe(struct efx_nic *efx,
+				enum efx_filter_priority priority,
+				u32 filter_id)
+{
+	struct efx_mcdi_filter_table *table;
+	int rc;
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+	rc = efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id,
+					     false);
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return rc;
+}
+
+/* Caller must hold efx->filter_sem for read */
+static void efx_mcdi_filter_remove_unsafe(struct efx_nic *efx,
+					  enum efx_filter_priority priority,
+					  u32 filter_id)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+
+	if (filter_id == EFX_EF10_FILTER_ID_INVALID)
+		return;
+
+	down_write(&table->lock);
+	efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id,
+					true);
+	up_write(&table->lock);
+}
+
+int efx_mcdi_filter_get_safe(struct efx_nic *efx,
+			     enum efx_filter_priority priority,
+			     u32 filter_id, struct efx_filter_spec *spec)
+{
+	unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id);
+	const struct efx_filter_spec *saved_spec;
+	struct efx_mcdi_filter_table *table;
+	int rc;
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
+	saved_spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+	if (saved_spec && saved_spec->priority == priority &&
+	    efx_mcdi_filter_pri(table, saved_spec) ==
+	    efx_mcdi_filter_get_unsafe_pri(filter_id)) {
+		*spec = *saved_spec;
+		rc = 0;
+	} else {
+		rc = -ENOENT;
+	}
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
+	return rc;
+}
+
+static int efx_mcdi_filter_insert_addr_list(struct efx_nic *efx,
+					    struct efx_mcdi_filter_vlan *vlan,
+					    bool multicast, bool rollback)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct efx_mcdi_dev_addr *addr_list;
+	enum efx_filter_flags filter_flags;
+	struct efx_filter_spec spec;
+	u8 baddr[ETH_ALEN];
+	unsigned int i, j;
+	int addr_count;
+	u16 *ids;
+	int rc;
+
+	if (multicast) {
+		addr_list = table->dev_mc_list;
+		addr_count = table->dev_mc_count;
+		ids = vlan->mc;
+	} else {
+		addr_list = table->dev_uc_list;
+		addr_count = table->dev_uc_count;
+		ids = vlan->uc;
+	}
+
+	filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
+
+	/* Insert/renew filters */
+	for (i = 0; i < addr_count; i++) {
+		EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
+		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
+		efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
+		rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+		if (rc < 0) {
+			if (rollback) {
+				netif_info(efx, drv, efx->net_dev,
+					   "efx_mcdi_filter_insert failed rc=%d\n",
+					   rc);
+				/* Fall back to promiscuous */
+				for (j = 0; j < i; j++) {
+					efx_mcdi_filter_remove_unsafe(
+						efx, EFX_FILTER_PRI_AUTO,
+						ids[j]);
+					ids[j] = EFX_EF10_FILTER_ID_INVALID;
+				}
+				return rc;
+			} else {
+				/* keep invalid ID, and carry on */
+			}
+		} else {
+			ids[i] = efx_mcdi_filter_get_unsafe_id(rc);
+		}
+	}
+
+	if (multicast && rollback) {
+		/* Also need an Ethernet broadcast filter */
+		EFX_WARN_ON_PARANOID(vlan->default_filters[EFX_EF10_BCAST] !=
+				     EFX_EF10_FILTER_ID_INVALID);
+		efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
+		eth_broadcast_addr(baddr);
+		efx_filter_set_eth_local(&spec, vlan->vid, baddr);
+		rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+		if (rc < 0) {
+			netif_warn(efx, drv, efx->net_dev,
+				   "Broadcast filter insert failed rc=%d\n", rc);
+			/* Fall back to promiscuous */
+			for (j = 0; j < i; j++) {
+				efx_mcdi_filter_remove_unsafe(
+					efx, EFX_FILTER_PRI_AUTO,
+					ids[j]);
+				ids[j] = EFX_EF10_FILTER_ID_INVALID;
+			}
+			return rc;
+		} else {
+			vlan->default_filters[EFX_EF10_BCAST] =
+				efx_mcdi_filter_get_unsafe_id(rc);
+		}
+	}
+
+	return 0;
+}
+
+static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
+				      struct efx_mcdi_filter_vlan *vlan,
+				      enum efx_encap_type encap_type,
+				      bool multicast, bool rollback)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	enum efx_filter_flags filter_flags;
+	struct efx_filter_spec spec;
+	u8 baddr[ETH_ALEN];
+	int rc;
+	u16 *id;
+
+	filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
+
+	efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
+
+	if (multicast)
+		efx_filter_set_mc_def(&spec);
+	else
+		efx_filter_set_uc_def(&spec);
+
+	if (encap_type) {
+		if (nic_data->datapath_caps &
+		    (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
+			efx_filter_set_encap_type(&spec, encap_type);
+		else
+			/*
+			 * don't insert encap filters on non-supporting
+			 * platforms. ID will be left as INVALID.
+			 */
+			return 0;
+	}
+
+	if (vlan->vid != EFX_FILTER_VID_UNSPEC)
+		efx_filter_set_eth_local(&spec, vlan->vid, NULL);
+
+	rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+	if (rc < 0) {
+		const char *um = multicast ? "Multicast" : "Unicast";
+		const char *encap_name = "";
+		const char *encap_ipv = "";
+
+		if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
+		    EFX_ENCAP_TYPE_VXLAN)
+			encap_name = "VXLAN ";
+		else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
+			 EFX_ENCAP_TYPE_NVGRE)
+			encap_name = "NVGRE ";
+		else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
+			 EFX_ENCAP_TYPE_GENEVE)
+			encap_name = "GENEVE ";
+		if (encap_type & EFX_ENCAP_FLAG_IPV6)
+			encap_ipv = "IPv6 ";
+		else if (encap_type)
+			encap_ipv = "IPv4 ";
+
+		/*
+		 * unprivileged functions can't insert mismatch filters
+		 * for encapsulated or unicast traffic, so downgrade
+		 * those warnings to debug.
+		 */
+		netif_cond_dbg(efx, drv, efx->net_dev,
+			       rc == -EPERM && (encap_type || !multicast), warn,
+			       "%s%s%s mismatch filter insert failed rc=%d\n",
+			       encap_name, encap_ipv, um, rc);
+	} else if (multicast) {
+		/* mapping from encap types to default filter IDs (multicast) */
+		static enum efx_mcdi_filter_default_filters map[] = {
+			[EFX_ENCAP_TYPE_NONE] = EFX_EF10_MCDEF,
+			[EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_MCDEF,
+			[EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_MCDEF,
+			[EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_MCDEF,
+			[EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
+				EFX_EF10_VXLAN6_MCDEF,
+			[EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
+				EFX_EF10_NVGRE6_MCDEF,
+			[EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
+				EFX_EF10_GENEVE6_MCDEF,
+		};
+
+		/* quick bounds check (BCAST result impossible) */
+		BUILD_BUG_ON(EFX_EF10_BCAST != 0);
+		if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+		/* then follow map */
+		id = &vlan->default_filters[map[encap_type]];
+
+		EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
+		*id = efx_mcdi_filter_get_unsafe_id(rc);
+		if (!nic_data->workaround_26807 && !encap_type) {
+			/* Also need an Ethernet broadcast filter */
+			efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
+					   filter_flags, 0);
+			eth_broadcast_addr(baddr);
+			efx_filter_set_eth_local(&spec, vlan->vid, baddr);
+			rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+			if (rc < 0) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Broadcast filter insert failed rc=%d\n",
+					   rc);
+				if (rollback) {
+					/* Roll back the mc_def filter */
+					efx_mcdi_filter_remove_unsafe(
+							efx, EFX_FILTER_PRI_AUTO,
+							*id);
+					*id = EFX_EF10_FILTER_ID_INVALID;
+					return rc;
+				}
+			} else {
+				EFX_WARN_ON_PARANOID(
+					vlan->default_filters[EFX_EF10_BCAST] !=
+					EFX_EF10_FILTER_ID_INVALID);
+				vlan->default_filters[EFX_EF10_BCAST] =
+					efx_mcdi_filter_get_unsafe_id(rc);
+			}
+		}
+		rc = 0;
+	} else {
+		/* mapping from encap types to default filter IDs (unicast) */
+		static enum efx_mcdi_filter_default_filters map[] = {
+			[EFX_ENCAP_TYPE_NONE] = EFX_EF10_UCDEF,
+			[EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_UCDEF,
+			[EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_UCDEF,
+			[EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_UCDEF,
+			[EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
+				EFX_EF10_VXLAN6_UCDEF,
+			[EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
+				EFX_EF10_NVGRE6_UCDEF,
+			[EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
+				EFX_EF10_GENEVE6_UCDEF,
+		};
+
+		/* quick bounds check (BCAST result impossible) */
+		BUILD_BUG_ON(EFX_EF10_BCAST != 0);
+		if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+		/* then follow map */
+		id = &vlan->default_filters[map[encap_type]];
+		EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
+		*id = rc;
+		rc = 0;
+	}
+	return rc;
+}
+
+/*
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+static void efx_mcdi_filter_vlan_sync_rx_mode(struct efx_nic *efx,
+					      struct efx_mcdi_filter_vlan *vlan)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+	/*
+	 * Do not install unspecified VID if VLAN filtering is enabled.
+	 * Do not install all specified VIDs if VLAN filtering is disabled.
+	 */
+	if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter)
+		return;
+
+	/* Insert/renew unicast filters */
+	if (table->uc_promisc) {
+		efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NONE,
+					   false, false);
+		efx_mcdi_filter_insert_addr_list(efx, vlan, false, false);
+	} else {
+		/*
+		 * If any of the filters failed to insert, fall back to
+		 * promiscuous mode - add in the uc_def filter.  But keep
+		 * our individual unicast filters.
+		 */
+		if (efx_mcdi_filter_insert_addr_list(efx, vlan, false, false))
+			efx_mcdi_filter_insert_def(efx, vlan,
+						   EFX_ENCAP_TYPE_NONE,
+						   false, false);
+	}
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
+				   false, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
+					      EFX_ENCAP_FLAG_IPV6,
+				   false, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
+				   false, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
+					      EFX_ENCAP_FLAG_IPV6,
+				   false, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
+				   false, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
+					      EFX_ENCAP_FLAG_IPV6,
+				   false, false);
+
+	/*
+	 * Insert/renew multicast filters
+	 *
+	 * If changing promiscuous state with cascaded multicast filters, remove
+	 * old filters first, so that packets are dropped rather than duplicated
+	 */
+	if (nic_data->workaround_26807 &&
+	    table->mc_promisc_last != table->mc_promisc)
+		efx_mcdi_filter_remove_old(efx);
+	if (table->mc_promisc) {
+		if (nic_data->workaround_26807) {
+			/*
+			 * If we failed to insert promiscuous filters, rollback
+			 * and fall back to individual multicast filters
+			 */
+			if (efx_mcdi_filter_insert_def(efx, vlan,
+						       EFX_ENCAP_TYPE_NONE,
+						       true, true)) {
+				/* Changing promisc state, so remove old filters */
+				efx_mcdi_filter_remove_old(efx);
+				efx_mcdi_filter_insert_addr_list(efx, vlan,
+								 true, false);
+			}
+		} else {
+			/*
+			 * If we failed to insert promiscuous filters, don't
+			 * rollback.  Regardless, also insert the mc_list,
+			 * unless it's incomplete due to overflow
+			 */
+			efx_mcdi_filter_insert_def(efx, vlan,
+						   EFX_ENCAP_TYPE_NONE,
+						   true, false);
+			if (!table->mc_overflow)
+				efx_mcdi_filter_insert_addr_list(efx, vlan,
+								 true, false);
+		}
+	} else {
+		/*
+		 * If any filters failed to insert, rollback and fall back to
+		 * promiscuous mode - mc_def filter and maybe broadcast.  If
+		 * that fails, roll back again and insert as many of our
+		 * individual multicast filters as we can.
+		 */
+		if (efx_mcdi_filter_insert_addr_list(efx, vlan, true, true)) {
+			/* Changing promisc state, so remove old filters */
+			if (nic_data->workaround_26807)
+				efx_mcdi_filter_remove_old(efx);
+			if (efx_mcdi_filter_insert_def(efx, vlan,
+						       EFX_ENCAP_TYPE_NONE,
+						       true, true))
+				efx_mcdi_filter_insert_addr_list(efx, vlan,
+								 true, false);
+		}
+	}
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
+				   true, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
+					      EFX_ENCAP_FLAG_IPV6,
+				   true, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
+				   true, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
+					      EFX_ENCAP_FLAG_IPV6,
+				   true, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
+				   true, false);
+	efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
+					      EFX_ENCAP_FLAG_IPV6,
+				   true, false);
+}
+
+int efx_mcdi_filter_clear_rx(struct efx_nic *efx,
+			     enum efx_filter_priority priority)
+{
+	struct efx_mcdi_filter_table *table;
+	unsigned int priority_mask;
+	unsigned int i;
+	int rc;
+
+	priority_mask = (((1U << (priority + 1)) - 1) &
+			 ~(1U << EFX_FILTER_PRI_AUTO));
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+	for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) {
+		rc = efx_mcdi_filter_remove_internal(efx, priority_mask,
+						     i, true);
+		if (rc && rc != -ENOENT)
+			break;
+		rc = 0;
+	}
+
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return rc;
+}
+
+u32 efx_mcdi_filter_count_rx_used(struct efx_nic *efx,
+				 enum efx_filter_priority priority)
+{
+	struct efx_mcdi_filter_table *table;
+	unsigned int filter_idx;
+	s32 count = 0;
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
+	for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+		if (table->entry[filter_idx].spec &&
+		    efx_mcdi_filter_entry_spec(table, filter_idx)->priority ==
+		    priority)
+			++count;
+	}
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
+	return count;
+}
+
+u32 efx_mcdi_filter_get_rx_id_limit(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+
+	return table->rx_match_count * EFX_MCDI_FILTER_TBL_ROWS * 2;
+}
+
+s32 efx_mcdi_filter_get_rx_ids(struct efx_nic *efx,
+			       enum efx_filter_priority priority,
+			       u32 *buf, u32 size)
+{
+	struct efx_mcdi_filter_table *table;
+	struct efx_filter_spec *spec;
+	unsigned int filter_idx;
+	s32 count = 0;
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
+
+	for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+		spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+		if (spec && spec->priority == priority) {
+			if (count == size) {
+				count = -EMSGSIZE;
+				break;
+			}
+			buf[count++] =
+				efx_mcdi_filter_make_filter_id(
+					efx_mcdi_filter_pri(table, spec),
+					filter_idx);
+		}
+	}
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
+	return count;
+}
+
+static int efx_mcdi_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags)
+{
+	int match_flags = 0;
+
+#define MAP_FLAG(gen_flag, mcdi_field) do {				\
+		u32 old_mcdi_flags = mcdi_flags;			\
+		mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##	\
+				     mcdi_field ## _LBN);		\
+		if (mcdi_flags != old_mcdi_flags)			\
+			match_flags |= EFX_FILTER_MATCH_ ## gen_flag;	\
+	} while (0)
+
+	if (encap) {
+		/* encap filters must specify encap type */
+		match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE;
+		/* and imply ethertype and ip proto */
+		mcdi_flags &=
+			~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
+		mcdi_flags &=
+			~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
+		/* VLAN tags refer to the outer packet */
+		MAP_FLAG(INNER_VID, INNER_VLAN);
+		MAP_FLAG(OUTER_VID, OUTER_VLAN);
+		/* everything else refers to the inner packet */
+		MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_UCAST_DST);
+		MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_MCAST_DST);
+		MAP_FLAG(REM_HOST, IFRM_SRC_IP);
+		MAP_FLAG(LOC_HOST, IFRM_DST_IP);
+		MAP_FLAG(REM_MAC, IFRM_SRC_MAC);
+		MAP_FLAG(REM_PORT, IFRM_SRC_PORT);
+		MAP_FLAG(LOC_MAC, IFRM_DST_MAC);
+		MAP_FLAG(LOC_PORT, IFRM_DST_PORT);
+		MAP_FLAG(ETHER_TYPE, IFRM_ETHER_TYPE);
+		MAP_FLAG(IP_PROTO, IFRM_IP_PROTO);
+	} else {
+		MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST);
+		MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST);
+		MAP_FLAG(REM_HOST, SRC_IP);
+		MAP_FLAG(LOC_HOST, DST_IP);
+		MAP_FLAG(REM_MAC, SRC_MAC);
+		MAP_FLAG(REM_PORT, SRC_PORT);
+		MAP_FLAG(LOC_MAC, DST_MAC);
+		MAP_FLAG(LOC_PORT, DST_PORT);
+		MAP_FLAG(ETHER_TYPE, ETHER_TYPE);
+		MAP_FLAG(INNER_VID, INNER_VLAN);
+		MAP_FLAG(OUTER_VID, OUTER_VLAN);
+		MAP_FLAG(IP_PROTO, IP_PROTO);
+	}
+#undef MAP_FLAG
+
+	/* Did we map them all? */
+	if (mcdi_flags)
+		return -EINVAL;
+
+	return match_flags;
+}
+
+bool efx_mcdi_filter_match_supported(struct efx_mcdi_filter_table *table,
+				     bool encap,
+				     enum efx_filter_match_flags match_flags)
+{
+	unsigned int match_pri;
+	int mf;
+
+	for (match_pri = 0;
+	     match_pri < table->rx_match_count;
+	     match_pri++) {
+		mf = efx_mcdi_filter_match_flags_from_mcdi(encap,
+				table->rx_match_mcdi_flags[match_pri]);
+		if (mf == match_flags)
+			return true;
+	}
+
+	return false;
+}
+
+static int
+efx_mcdi_filter_table_probe_matches(struct efx_nic *efx,
+				    struct efx_mcdi_filter_table *table,
+				    bool encap)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
+	unsigned int pd_match_pri, pd_match_count;
+	size_t outlen;
+	int rc;
+
+	/* Find out which RX filter types are supported, and their priorities */
+	MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP,
+		       encap ?
+		       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES :
+		       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES);
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO,
+			  inbuf, sizeof(inbuf), outbuf, sizeof(outbuf),
+			  &outlen);
+	if (rc)
+		return rc;
+
+	pd_match_count = MCDI_VAR_ARRAY_LEN(
+		outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES);
+
+	for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) {
+		u32 mcdi_flags =
+			MCDI_ARRAY_DWORD(
+				outbuf,
+				GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES,
+				pd_match_pri);
+		rc = efx_mcdi_filter_match_flags_from_mcdi(encap, mcdi_flags);
+		if (rc < 0) {
+			netif_dbg(efx, probe, efx->net_dev,
+				  "%s: fw flags %#x pri %u not supported in driver\n",
+				  __func__, mcdi_flags, pd_match_pri);
+		} else {
+			netif_dbg(efx, probe, efx->net_dev,
+				  "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n",
+				  __func__, mcdi_flags, pd_match_pri,
+				  rc, table->rx_match_count);
+			table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags;
+			table->rx_match_count++;
+		}
+	}
+
+	return 0;
+}
+
+int efx_mcdi_filter_table_probe(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct net_device *net_dev = efx->net_dev;
+	struct efx_mcdi_filter_table *table;
+	struct efx_mcdi_filter_vlan *vlan;
+	int rc;
+
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return -EINVAL;
+
+	if (efx->filter_state) /* already probed */
+		return 0;
+
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	table->rx_match_count = 0;
+	rc = efx_mcdi_filter_table_probe_matches(efx, table, false);
+	if (rc)
+		goto fail;
+	if (nic_data->datapath_caps &
+		   (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
+		rc = efx_mcdi_filter_table_probe_matches(efx, table, true);
+	if (rc)
+		goto fail;
+	if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	    !(efx_mcdi_filter_match_supported(table, false,
+		(EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) &&
+	      efx_mcdi_filter_match_supported(table, false,
+		(EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) {
+		netif_info(efx, probe, net_dev,
+			   "VLAN filters are not supported in this firmware variant\n");
+		net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+		efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+		net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+	}
+
+	table->entry = vzalloc(array_size(EFX_MCDI_FILTER_TBL_ROWS,
+					  sizeof(*table->entry)));
+	if (!table->entry) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	table->mc_promisc_last = false;
+	table->vlan_filter =
+		!!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
+	INIT_LIST_HEAD(&table->vlan_list);
+	init_rwsem(&table->lock);
+
+	efx->filter_state = table;
+
+	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
+		rc = efx_mcdi_filter_add_vlan(efx, vlan->vid);
+		if (rc)
+			goto fail_add_vlan;
+	}
+
+	return 0;
+
+fail_add_vlan:
+	efx_mcdi_filter_cleanup_vlans(efx);
+	efx->filter_state = NULL;
+fail:
+	kfree(table);
+	return rc;
+}
+
+/*
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+void efx_mcdi_filter_table_restore(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	unsigned int invalid_filters = 0, failed = 0;
+	struct efx_mcdi_filter_vlan *vlan;
+	struct efx_filter_spec *spec;
+	struct efx_rss_context *ctx;
+	unsigned int filter_idx;
+	u32 mcdi_flags;
+	int match_pri;
+	int rc, i;
+
+	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
+
+	if (!nic_data->must_restore_filters)
+		return;
+
+	if (!table)
+		return;
+
+	down_write(&table->lock);
+	mutex_lock(&efx->rss_lock);
+
+	for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+		spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+		if (!spec)
+			continue;
+
+		mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec);
+		match_pri = 0;
+		while (match_pri < table->rx_match_count &&
+		       table->rx_match_mcdi_flags[match_pri] != mcdi_flags)
+			++match_pri;
+		if (match_pri >= table->rx_match_count) {
+			invalid_filters++;
+			goto not_restored;
+		}
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
+		else
+			ctx = &efx->rss_context;
+		if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+			if (!ctx) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+			if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+		}
+
+		rc = efx_mcdi_filter_push(efx, spec,
+					  &table->entry[filter_idx].handle,
+					  ctx, false);
+		if (rc)
+			failed++;
+
+		if (rc) {
+not_restored:
+			list_for_each_entry(vlan, &table->vlan_list, list)
+				for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; ++i)
+					if (vlan->default_filters[i] == filter_idx)
+						vlan->default_filters[i] =
+							EFX_EF10_FILTER_ID_INVALID;
+
+			kfree(spec);
+			efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0);
+		}
+	}
+
+	mutex_unlock(&efx->rss_lock);
+	up_write(&table->lock);
+
+	/*
+	 * This can happen validly if the MC's capabilities have changed, so
+	 * is not an error.
+	 */
+	if (invalid_filters)
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Did not restore %u filters that are now unsupported.\n",
+			  invalid_filters);
+
+	if (failed)
+		netif_err(efx, hw, efx->net_dev,
+			  "unable to restore %u filters\n", failed);
+	else
+		nic_data->must_restore_filters = false;
+}
+
+void efx_mcdi_filter_table_remove(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
+	struct efx_filter_spec *spec;
+	unsigned int filter_idx;
+	int rc;
+
+	efx_mcdi_filter_cleanup_vlans(efx);
+	efx->filter_state = NULL;
+	/*
+	 * If we were called without locking, then it's not safe to free
+	 * the table as others might be using it.  So we just WARN, leak
+	 * the memory, and potentially get an inconsistent filter table
+	 * state.
+	 * This should never actually happen.
+	 */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
+	if (!table)
+		return;
+
+	for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+		spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+		if (!spec)
+			continue;
+
+		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+			       efx_mcdi_filter_is_exclusive(spec) ?
+			       MC_CMD_FILTER_OP_IN_OP_REMOVE :
+			       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
+		MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
+			       table->entry[filter_idx].handle);
+		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf,
+					sizeof(inbuf), NULL, 0, NULL);
+		if (rc)
+			netif_info(efx, drv, efx->net_dev,
+				   "%s: filter %04x remove failed\n",
+				   __func__, filter_idx);
+		kfree(spec);
+	}
+
+	vfree(table->entry);
+	kfree(table);
+}
+
+static void efx_mcdi_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	unsigned int filter_idx;
+
+	efx_rwsem_assert_write_locked(&table->lock);
+
+	if (*id != EFX_EF10_FILTER_ID_INVALID) {
+		filter_idx = efx_mcdi_filter_get_unsafe_id(*id);
+		if (!table->entry[filter_idx].spec)
+			netif_dbg(efx, drv, efx->net_dev,
+				  "marked null spec old %04x:%04x\n", *id,
+				  filter_idx);
+		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD;
+		*id = EFX_EF10_FILTER_ID_INVALID;
+	}
+}
+
+/* Mark old per-VLAN filters that may need to be removed */
+static void _efx_mcdi_filter_vlan_mark_old(struct efx_nic *efx,
+					   struct efx_mcdi_filter_vlan *vlan)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	unsigned int i;
+
+	for (i = 0; i < table->dev_uc_count; i++)
+		efx_mcdi_filter_mark_one_old(efx, &vlan->uc[i]);
+	for (i = 0; i < table->dev_mc_count; i++)
+		efx_mcdi_filter_mark_one_old(efx, &vlan->mc[i]);
+	for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
+		efx_mcdi_filter_mark_one_old(efx, &vlan->default_filters[i]);
+}
+
+/*
+ * Mark old filters that may need to be removed.
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+static void efx_mcdi_filter_mark_old(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct efx_mcdi_filter_vlan *vlan;
+
+	down_write(&table->lock);
+	list_for_each_entry(vlan, &table->vlan_list, list)
+		_efx_mcdi_filter_vlan_mark_old(efx, vlan);
+	up_write(&table->lock);
+}
+
+int efx_mcdi_filter_add_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct efx_mcdi_filter_vlan *vlan;
+	unsigned int i;
+
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return -EINVAL;
+
+	vlan = efx_mcdi_filter_find_vlan(efx, vid);
+	if (WARN_ON(vlan)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "VLAN %u already added\n", vid);
+		return -EALREADY;
+	}
+
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return -ENOMEM;
+
+	vlan->vid = vid;
+
+	for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
+		vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID;
+	for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
+		vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID;
+	for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
+		vlan->default_filters[i] = EFX_EF10_FILTER_ID_INVALID;
+
+	list_add_tail(&vlan->list, &table->vlan_list);
+
+	if (efx_dev_registered(efx))
+		efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan);
+
+	return 0;
+}
+
+static void efx_mcdi_filter_del_vlan_internal(struct efx_nic *efx,
+					      struct efx_mcdi_filter_vlan *vlan)
+{
+	unsigned int i;
+
+	/* See comment in efx_mcdi_filter_table_remove() */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
+	list_del(&vlan->list);
+
+	for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
+		efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+					      vlan->uc[i]);
+	for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
+		efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+					      vlan->mc[i]);
+	for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
+		if (vlan->default_filters[i] != EFX_EF10_FILTER_ID_INVALID)
+			efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+						      vlan->default_filters[i]);
+
+	kfree(vlan);
+}
+
+void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid)
+{
+	struct efx_mcdi_filter_vlan *vlan;
+
+	/* See comment in efx_mcdi_filter_table_remove() */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
+	vlan = efx_mcdi_filter_find_vlan(efx, vid);
+	if (!vlan) {
+		netif_err(efx, drv, efx->net_dev,
+			  "VLAN %u not found in filter state\n", vid);
+		return;
+	}
+
+	efx_mcdi_filter_del_vlan_internal(efx, vlan);
+}
+
+struct efx_mcdi_filter_vlan *efx_mcdi_filter_find_vlan(struct efx_nic *efx,
+						       u16 vid)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct efx_mcdi_filter_vlan *vlan;
+
+	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
+
+	list_for_each_entry(vlan, &table->vlan_list, list) {
+		if (vlan->vid == vid)
+			return vlan;
+	}
+
+	return NULL;
+}
+
+void efx_mcdi_filter_cleanup_vlans(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct efx_mcdi_filter_vlan *vlan, *next_vlan;
+
+	/* See comment in efx_mcdi_filter_table_remove() */
+	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+		return;
+
+	if (!table)
+		return;
+
+	list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list)
+		efx_mcdi_filter_del_vlan_internal(efx, vlan);
+}
+
+static void efx_mcdi_filter_uc_addr_list(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct net_device *net_dev = efx->net_dev;
+	struct netdev_hw_addr *uc;
+	unsigned int i;
+
+	table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
+	ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
+	i = 1;
+	netdev_for_each_uc_addr(uc, net_dev) {
+		if (i >= EFX_EF10_FILTER_DEV_UC_MAX) {
+			table->uc_promisc = true;
+			break;
+		}
+		ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
+		i++;
+	}
+
+	table->dev_uc_count = i;
+}
+
+static void efx_mcdi_filter_mc_addr_list(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct net_device *net_dev = efx->net_dev;
+	struct netdev_hw_addr *mc;
+	unsigned int i;
+
+	table->mc_overflow = false;
+	table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));
+
+	i = 0;
+	netdev_for_each_mc_addr(mc, net_dev) {
+		if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {
+			table->mc_promisc = true;
+			table->mc_overflow = true;
+			break;
+		}
+		ether_addr_copy(table->dev_mc_list[i].addr, mc->addr);
+		i++;
+	}
+
+	table->dev_mc_count = i;
+}
+
+/*
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+void efx_mcdi_filter_sync_rx_mode(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+	struct net_device *net_dev = efx->net_dev;
+	struct efx_mcdi_filter_vlan *vlan;
+	bool vlan_filter;
+
+	if (!efx_dev_registered(efx))
+		return;
+
+	if (!table)
+		return;
+
+	efx_mcdi_filter_mark_old(efx);
+
+	/*
+	 * Copy/convert the address lists; add the primary station
+	 * address and broadcast address
+	 */
+	netif_addr_lock_bh(net_dev);
+	efx_mcdi_filter_uc_addr_list(efx);
+	efx_mcdi_filter_mc_addr_list(efx);
+	netif_addr_unlock_bh(net_dev);
+
+	/*
+	 * If VLAN filtering changes, all old filters are finally removed.
+	 * Do it in advance to avoid conflicts for unicast untagged and
+	 * VLAN 0 tagged filters.
+	 */
+	vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
+	if (table->vlan_filter != vlan_filter) {
+		table->vlan_filter = vlan_filter;
+		efx_mcdi_filter_remove_old(efx);
+	}
+
+	list_for_each_entry(vlan, &table->vlan_list, list)
+		efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan);
+
+	efx_mcdi_filter_remove_old(efx);
+	table->mc_promisc_last = table->mc_promisc;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+bool efx_mcdi_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
+				    unsigned int filter_idx)
+{
+	struct efx_filter_spec *spec, saved_spec;
+	struct efx_mcdi_filter_table *table;
+	struct efx_arfs_rule *rule = NULL;
+	bool ret = true, force = false;
+	u16 arfs_id;
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+	spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+
+	if (!spec || spec->priority != EFX_FILTER_PRI_HINT)
+		goto out_unlock;
+
+	spin_lock_bh(&efx->rps_hash_lock);
+	if (!efx->rps_hash_table) {
+		/* In the absence of the table, we always return 0 to ARFS. */
+		arfs_id = 0;
+	} else {
+		rule = efx_rps_hash_find(efx, spec);
+		if (!rule)
+			/* ARFS table doesn't know of this filter, so remove it */
+			goto expire;
+		arfs_id = rule->arfs_id;
+		ret = efx_rps_check_rule(rule, filter_idx, &force);
+		if (force)
+			goto expire;
+		if (!ret) {
+			spin_unlock_bh(&efx->rps_hash_lock);
+			goto out_unlock;
+		}
+	}
+	if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id))
+		ret = false;
+	else if (rule)
+		rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+expire:
+	saved_spec = *spec; /* remove operation will kfree spec */
+	spin_unlock_bh(&efx->rps_hash_lock);
+	/*
+	 * At this point (since we dropped the lock), another thread might queue
+	 * up a fresh insertion request (but the actual insertion will be held
+	 * up by our possession of the filter table lock).  In that case, it
+	 * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that
+	 * the rule is not removed by efx_rps_hash_del() below.
+	 */
+	if (ret)
+		ret = efx_mcdi_filter_remove_internal(efx, 1U << spec->priority,
+						      filter_idx, true) == 0;
+	/*
+	 * While we can't safely dereference rule (we dropped the lock), we can
+	 * still test it for NULL.
+	 */
+	if (ret && rule) {
+		/* Expiring, so remove entry from ARFS table */
+		spin_lock_bh(&efx->rps_hash_lock);
+		efx_rps_hash_del(efx, &saved_spec);
+		spin_unlock_bh(&efx->rps_hash_lock);
+	}
+out_unlock:
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return ret;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
+
+#define RSS_MODE_HASH_ADDRS	(1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
+				 1 << RSS_MODE_HASH_DST_ADDR_LBN)
+#define RSS_MODE_HASH_PORTS	(1 << RSS_MODE_HASH_SRC_PORT_LBN |\
+				 1 << RSS_MODE_HASH_DST_PORT_LBN)
+#define RSS_CONTEXT_FLAGS_DEFAULT	(1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
+					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
+					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
+					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
+					 (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
+					 (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
+
+int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags)
+{
+	/*
+	 * Firmware had a bug (sfc bug 61952) where it would not actually
+	 * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
+	 * This meant that it would always contain whatever was previously
+	 * in the MCDI buffer.  Fortunately, all firmware versions with
+	 * this bug have the same default flags value for a newly-allocated
+	 * RSS context, and the only time we want to get the flags is just
+	 * after allocating.  Moreover, the response has a 32-bit hole
+	 * where the context ID would be in the request, so we can use an
+	 * overlength buffer in the request and pre-fill the flags field
+	 * with what we believe the default to be.  Thus if the firmware
+	 * has the bug, it will leave our pre-filled value in the flags
+	 * field of the response, and we will get the right answer.
+	 *
+	 * However, this does mean that this function should NOT be used if
+	 * the RSS context flags might not be their defaults - it is ONLY
+	 * reliably correct for a newly-allocated RSS context.
+	 */
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	/* Check we have a hole for the context ID */
+	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
+		       RSS_CONTEXT_FLAGS_DEFAULT);
+	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
+			  sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+	if (rc == 0) {
+		if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
+			rc = -EIO;
+		else
+			*flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
+	}
+	return rc;
+}
+
+/*
+ * Attempt to enable 4-tuple UDP hashing on the specified RSS context.
+ * If we fail, we just leave the RSS context at its default hash settings,
+ * which is safe but may slightly reduce performance.
+ * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
+ * just need to set the UDP ports flags (for both IP versions).
+ */
+void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
+				    struct efx_rss_context *ctx)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
+	u32 flags;
+
+	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
+
+	if (efx_mcdi_get_rss_context_flags(efx, ctx->context_id, &flags) != 0)
+		return;
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
+		       ctx->context_id);
+	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
+	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
+	if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
+			  NULL, 0, NULL))
+		/* Succeeded, so UDP 4-tuple is now enabled */
+		ctx->rx_hash_udp_4tuple = true;
+}
+
+static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive,
+					     struct efx_rss_context *ctx,
+					     unsigned *context_size)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	size_t outlen;
+	int rc;
+	u32 alloc_type = exclusive ?
+				MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE :
+				MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED;
+	unsigned rss_spread = exclusive ?
+				efx->rss_spread :
+				min(rounddown_pow_of_two(efx->rss_spread),
+				    EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
+
+	if (!exclusive && rss_spread == 1) {
+		ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+		if (context_size)
+			*context_size = 1;
+		return 0;
+	}
+
+	if (nic_data->datapath_caps &
+	    1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN)
+		return -EOPNOTSUPP;
+
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
+		       nic_data->vport_id);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread);
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf),
+		outbuf, sizeof(outbuf), &outlen);
+	if (rc != 0)
+		return rc;
+
+	if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
+		return -EIO;
+
+	ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
+
+	if (context_size)
+		*context_size = rss_spread;
+
+	if (nic_data->datapath_caps &
+	    1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
+		efx_mcdi_set_rss_context_flags(efx, ctx);
+
+	return 0;
+}
+
+static int efx_mcdi_filter_free_rss_context(struct efx_nic *efx, u32 context)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
+
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
+		       context);
+	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
+			    NULL, 0, NULL);
+}
+
+static int efx_mcdi_filter_populate_rss_table(struct efx_nic *efx, u32 context,
+				       const u32 *rx_indir_table, const u8 *key)
+{
+	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN);
+	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN);
+	int i, rc;
+
+	MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
+		       context);
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
+		     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
+
+	/* This iterates over the length of efx->rss_context.rx_indir_table, but
+	 * copies bytes from rx_indir_table.  That's because the latter is a
+	 * pointer rather than an array, but should have the same length.
+	 * The efx->rss_context.rx_hash_key loop below is similar.
+	 */
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
+		MCDI_PTR(tablebuf,
+			 RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
+				(u8) rx_indir_table[i];
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf,
+			  sizeof(tablebuf), NULL, 0, NULL);
+	if (rc != 0)
+		return rc;
+
+	MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
+		       context);
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
+		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
+		MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
+
+	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
+			    sizeof(keybuf), NULL, 0, NULL);
+}
+
+void efx_mcdi_rx_free_indir_table(struct efx_nic *efx)
+{
+	int rc;
+
+	if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) {
+		rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id);
+		WARN_ON(rc != 0);
+	}
+	efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+}
+
+static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx,
+					      unsigned *context_size)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	int rc = efx_mcdi_filter_alloc_rss_context(efx, false, &efx->rss_context,
+					    context_size);
+
+	if (rc != 0)
+		return rc;
+
+	nic_data->rx_rss_context_exclusive = false;
+	efx_set_default_rx_indir_table(efx, &efx->rss_context);
+	return 0;
+}
+
+static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx,
+						 const u32 *rx_indir_table,
+						 const u8 *key)
+{
+	u32 old_rx_rss_context = efx->rss_context.context_id;
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	int rc;
+
+	if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID ||
+	    !nic_data->rx_rss_context_exclusive) {
+		rc = efx_mcdi_filter_alloc_rss_context(efx, true, &efx->rss_context,
+						NULL);
+		if (rc == -EOPNOTSUPP)
+			return rc;
+		else if (rc != 0)
+			goto fail1;
+	}
+
+	rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.context_id,
+					 rx_indir_table, key);
+	if (rc != 0)
+		goto fail2;
+
+	if (efx->rss_context.context_id != old_rx_rss_context &&
+	    old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID)
+		WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0);
+	nic_data->rx_rss_context_exclusive = true;
+	if (rx_indir_table != efx->rss_context.rx_indir_table)
+		memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+		       sizeof(efx->rss_context.rx_indir_table));
+	if (key != efx->rss_context.rx_hash_key)
+		memcpy(efx->rss_context.rx_hash_key, key,
+		       efx->type->rx_hash_key_size);
+
+	return 0;
+
+fail2:
+	if (old_rx_rss_context != efx->rss_context.context_id) {
+		WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id) != 0);
+		efx->rss_context.context_id = old_rx_rss_context;
+	}
+fail1:
+	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+	return rc;
+}
+
+int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx,
+					struct efx_rss_context *ctx,
+					const u32 *rx_indir_table,
+					const u8 *key)
+{
+	int rc;
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
+		rc = efx_mcdi_filter_alloc_rss_context(efx, true, ctx, NULL);
+		if (rc)
+			return rc;
+	}
+
+	if (!rx_indir_table) /* Delete this context */
+		return efx_mcdi_filter_free_rss_context(efx, ctx->context_id);
+
+	rc = efx_mcdi_filter_populate_rss_table(efx, ctx->context_id,
+					 rx_indir_table, key);
+	if (rc)
+		return rc;
+
+	memcpy(ctx->rx_indir_table, rx_indir_table,
+	       sizeof(efx->rss_context.rx_indir_table));
+	memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
+
+	return 0;
+}
+
+int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
+					struct efx_rss_context *ctx)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
+	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
+	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
+	size_t outlen;
+	int rc, i;
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
+		     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
+
+	if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID)
+		return -ENOENT;
+
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
+		     MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
+	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
+			  tablebuf, sizeof(tablebuf), &outlen);
+	if (rc != 0)
+		return rc;
+
+	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
+		return -EIO;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
+				RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
+
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
+		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
+	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
+			  keybuf, sizeof(keybuf), &outlen);
+	if (rc != 0)
+		return rc;
+
+	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
+		return -EIO;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
+		ctx->rx_hash_key[i] = MCDI_PTR(
+				keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
+
+	return 0;
+}
+
+int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx)
+{
+	int rc;
+
+	mutex_lock(&efx->rss_lock);
+	rc = efx_mcdi_rx_pull_rss_context_config(efx, &efx->rss_context);
+	mutex_unlock(&efx->rss_lock);
+	return rc;
+}
+
+void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_rss_context *ctx;
+	int rc;
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	if (!nic_data->must_restore_rss_contexts)
+		return;
+
+	list_for_each_entry(ctx, &efx->rss_context.list, list) {
+		/* previous NIC RSS context is gone */
+		ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+		/* so try to allocate a new one */
+		rc = efx_mcdi_rx_push_rss_context_config(efx, ctx,
+							 ctx->rx_indir_table,
+							 ctx->rx_hash_key);
+		if (rc)
+			netif_warn(efx, probe, efx->net_dev,
+				   "failed to restore RSS context %u, rc=%d"
+				   "; RSS filters may fail to be applied\n",
+				   ctx->user_id, rc);
+	}
+	nic_data->must_restore_rss_contexts = false;
+}
+
+int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
+				   const u32 *rx_indir_table,
+				   const u8 *key)
+{
+	int rc;
+
+	if (efx->rss_spread == 1)
+		return 0;
+
+	if (!key)
+		key = efx->rss_context.rx_hash_key;
+
+	rc = efx_mcdi_filter_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
+
+	if (rc == -ENOBUFS && !user) {
+		unsigned context_size;
+		bool mismatch = false;
+		size_t i;
+
+		for (i = 0;
+		     i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
+		     i++)
+			mismatch = rx_indir_table[i] !=
+				ethtool_rxfh_indir_default(i, efx->rss_spread);
+
+		rc = efx_mcdi_filter_rx_push_shared_rss_config(efx, &context_size);
+		if (rc == 0) {
+			if (context_size != efx->rss_spread)
+				netif_warn(efx, probe, efx->net_dev,
+					   "Could not allocate an exclusive RSS"
+					   " context; allocated a shared one of"
+					   " different size."
+					   " Wanted %u, got %u.\n",
+					   efx->rss_spread, context_size);
+			else if (mismatch)
+				netif_warn(efx, probe, efx->net_dev,
+					   "Could not allocate an exclusive RSS"
+					   " context; allocated a shared one but"
+					   " could not apply custom"
+					   " indirection.\n");
+			else
+				netif_info(efx, probe, efx->net_dev,
+					   "Could not allocate an exclusive RSS"
+					   " context; allocated a shared one.\n");
+		}
+	}
+	return rc;
+}
+
+int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
+				   const u32 *rx_indir_table
+				   __attribute__ ((unused)),
+				   const u8 *key
+				   __attribute__ ((unused)))
+{
+	if (user)
+		return -EOPNOTSUPP;
+	if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID)
+		return 0;
+	return efx_mcdi_filter_rx_push_shared_rss_config(efx, NULL);
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h
new file mode 100644
index 000000000000..1837f4f5d661
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_filters.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_FILTERS_H
+#define EFX_MCDI_FILTERS_H
+
+#include "net_driver.h"
+#include "filter.h"
+#include "mcdi_pcol.h"
+
+#define EFX_EF10_FILTER_DEV_UC_MAX	32
+#define EFX_EF10_FILTER_DEV_MC_MAX	256
+
+enum efx_mcdi_filter_default_filters {
+	EFX_EF10_BCAST,
+	EFX_EF10_UCDEF,
+	EFX_EF10_MCDEF,
+	EFX_EF10_VXLAN4_UCDEF,
+	EFX_EF10_VXLAN4_MCDEF,
+	EFX_EF10_VXLAN6_UCDEF,
+	EFX_EF10_VXLAN6_MCDEF,
+	EFX_EF10_NVGRE4_UCDEF,
+	EFX_EF10_NVGRE4_MCDEF,
+	EFX_EF10_NVGRE6_UCDEF,
+	EFX_EF10_NVGRE6_MCDEF,
+	EFX_EF10_GENEVE4_UCDEF,
+	EFX_EF10_GENEVE4_MCDEF,
+	EFX_EF10_GENEVE6_UCDEF,
+	EFX_EF10_GENEVE6_MCDEF,
+
+	EFX_EF10_NUM_DEFAULT_FILTERS
+};
+
+/* Per-VLAN filters information */
+struct efx_mcdi_filter_vlan {
+	struct list_head list;
+	u16 vid;
+	u16 uc[EFX_EF10_FILTER_DEV_UC_MAX];
+	u16 mc[EFX_EF10_FILTER_DEV_MC_MAX];
+	u16 default_filters[EFX_EF10_NUM_DEFAULT_FILTERS];
+};
+
+struct efx_mcdi_dev_addr {
+	u8 addr[ETH_ALEN];
+};
+
+struct efx_mcdi_filter_table {
+/* The MCDI match masks supported by this fw & hw, in order of priority */
+	u32 rx_match_mcdi_flags[
+		MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2];
+	unsigned int rx_match_count;
+
+	struct rw_semaphore lock; /* Protects entries */
+	struct {
+		unsigned long spec;	/* pointer to spec plus flag bits */
+/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */
+/* unused flag	1UL */
+#define EFX_EF10_FILTER_FLAG_AUTO_OLD	2UL
+#define EFX_EF10_FILTER_FLAGS		3UL
+		u64 handle;		/* firmware handle */
+	} *entry;
+/* Shadow of net_device address lists, guarded by mac_lock */
+	struct efx_mcdi_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX];
+	struct efx_mcdi_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX];
+	int dev_uc_count;
+	int dev_mc_count;
+	bool uc_promisc;
+	bool mc_promisc;
+/* Whether in multicast promiscuous mode when last changed */
+	bool mc_promisc_last;
+	bool mc_overflow; /* Too many MC addrs; should always imply mc_promisc */
+	bool vlan_filter;
+	struct list_head vlan_list;
+};
+
+int efx_mcdi_filter_table_probe(struct efx_nic *efx);
+void efx_mcdi_filter_table_remove(struct efx_nic *efx);
+void efx_mcdi_filter_table_restore(struct efx_nic *efx);
+
+/*
+ * The filter table(s) are managed by firmware and we have write-only
+ * access.  When removing filters we must identify them to the
+ * firmware by a 64-bit handle, but this is too wide for Linux kernel
+ * interfaces (32-bit for RX NFC, 16-bit for RFS).  Also, we need to
+ * be able to tell in advance whether a requested insertion will
+ * replace an existing filter.  Therefore we maintain a software hash
+ * table, which should be at least as large as the hardware hash
+ * table.
+ *
+ * Huntington has a single 8K filter table shared between all filter
+ * types and both ports.
+ */
+#define EFX_MCDI_FILTER_TBL_ROWS 8192
+
+bool efx_mcdi_filter_match_supported(struct efx_mcdi_filter_table *table,
+				     bool encap,
+				     enum efx_filter_match_flags match_flags);
+
+void efx_mcdi_filter_sync_rx_mode(struct efx_nic *efx);
+s32 efx_mcdi_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec,
+			   bool replace_equal);
+int efx_mcdi_filter_remove_safe(struct efx_nic *efx,
+				enum efx_filter_priority priority,
+				u32 filter_id);
+int efx_mcdi_filter_get_safe(struct efx_nic *efx,
+			     enum efx_filter_priority priority,
+			     u32 filter_id, struct efx_filter_spec *spec);
+
+u32 efx_mcdi_filter_count_rx_used(struct efx_nic *efx,
+				  enum efx_filter_priority priority);
+int efx_mcdi_filter_clear_rx(struct efx_nic *efx,
+			     enum efx_filter_priority priority);
+u32 efx_mcdi_filter_get_rx_id_limit(struct efx_nic *efx);
+s32 efx_mcdi_filter_get_rx_ids(struct efx_nic *efx,
+			       enum efx_filter_priority priority,
+			       u32 *buf, u32 size);
+
+void efx_mcdi_filter_cleanup_vlans(struct efx_nic *efx);
+int efx_mcdi_filter_add_vlan(struct efx_nic *efx, u16 vid);
+struct efx_mcdi_filter_vlan *efx_mcdi_filter_find_vlan(struct efx_nic *efx, u16 vid);
+void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid);
+
+void efx_mcdi_rx_free_indir_table(struct efx_nic *efx);
+int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx,
+					struct efx_rss_context *ctx,
+					const u32 *rx_indir_table,
+					const u8 *key);
+int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
+				   const u32 *rx_indir_table,
+				   const u8 *key);
+int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
+				   const u32 *rx_indir_table
+				   __attribute__ ((unused)),
+				   const u8 *key
+				   __attribute__ ((unused)));
+int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx);
+int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
+					struct efx_rss_context *ctx);
+int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context,
+				   u32 *flags);
+void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
+				    struct efx_rss_context *ctx);
+void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx);
+
+static inline void efx_mcdi_update_rx_scatter(struct efx_nic *efx)
+{
+	/* no need to do anything here */
+}
+
+bool efx_mcdi_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
+				    unsigned int filter_idx);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.c b/drivers/net/ethernet/sfc/mcdi_functions.c
new file mode 100644
index 000000000000..dcfe78b0fa5a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_functions.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "mcdi_functions.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+
+int efx_mcdi_free_vis(struct efx_nic *efx)
+{
+	MCDI_DECLARE_BUF_ERR(outbuf);
+	size_t outlen;
+	int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0,
+				    outbuf, sizeof(outbuf), &outlen);
+
+	/* -EALREADY means nothing to free, so ignore */
+	if (rc == -EALREADY)
+		rc = 0;
+	if (rc)
+		efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen,
+				       rc);
+	return rc;
+}
+
+int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis,
+		       unsigned int max_vis, unsigned int *vi_base,
+		       unsigned int *allocated_vis)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
+	size_t outlen;
+	int rc;
+
+	MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
+	MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
+	rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc != 0)
+		return rc;
+
+	if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
+		return -EIO;
+
+	netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
+		  MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
+
+	if (vi_base)
+		*vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
+	if (allocated_vis)
+		*allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
+	return 0;
+}
+
+int efx_mcdi_ev_probe(struct efx_channel *channel)
+{
+	return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf,
+				    (channel->eventq_mask + 1) *
+				    sizeof(efx_qword_t),
+				    GFP_KERNEL);
+}
+
+int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2)
+{
+	MCDI_DECLARE_BUF(inbuf,
+			 MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
+						   EFX_BUF_SIZE));
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
+	size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
+	struct efx_nic *efx = channel->efx;
+	size_t inlen, outlen;
+	dma_addr_t dma_addr;
+	int rc, i;
+
+	/* Fill event queue with all ones (i.e. empty events) */
+	memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
+
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1);
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
+	/* INIT_EVQ expects index in vector table, not absolute */
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
+		       MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0);
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE,
+		       MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
+	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
+
+	if (v2) {
+		/* Use the new generic approach to specifying event queue
+		 * configuration, requesting lower latency or higher throughput.
+		 * The options that actually get used appear in the output.
+		 */
+		MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
+				      INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
+				      INIT_EVQ_V2_IN_FLAG_TYPE,
+				      MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
+	} else {
+		MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
+				      INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
+				      INIT_EVQ_IN_FLAG_RX_MERGE, 1,
+				      INIT_EVQ_IN_FLAG_TX_MERGE, 1,
+				      INIT_EVQ_IN_FLAG_CUT_THRU, v1_cut_thru);
+	}
+
+	dma_addr = channel->eventq.buf.dma_addr;
+	for (i = 0; i < entries; ++i) {
+		MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
+		dma_addr += EFX_BUF_SIZE;
+	}
+
+	inlen = MC_CMD_INIT_EVQ_IN_LEN(entries);
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
+			  outbuf, sizeof(outbuf), &outlen);
+
+	if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Channel %d using event queue flags %08x\n",
+			  channel->channel,
+			  MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+
+	return rc;
+}
+
+void efx_mcdi_ev_remove(struct efx_channel *channel)
+{
+	efx_nic_free_buffer(channel->efx, &channel->eventq.buf);
+}
+
+void efx_mcdi_ev_fini(struct efx_channel *channel)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN);
+	MCDI_DECLARE_BUF_ERR(outbuf);
+	struct efx_nic *efx = channel->efx;
+	size_t outlen;
+	int rc;
+
+	MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel);
+
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf), &outlen);
+
+	if (rc && rc != -EALREADY)
+		goto fail;
+
+	return;
+
+fail:
+	efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN,
+			       outbuf, outlen, rc);
+}
+
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
+						       EFX_BUF_SIZE));
+	bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
+	size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
+	struct efx_channel *channel = tx_queue->channel;
+	struct efx_nic *efx = tx_queue->efx;
+	struct efx_ef10_nic_data *nic_data;
+	dma_addr_t dma_addr;
+	size_t inlen;
+	int rc, i;
+
+	BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+
+	nic_data = efx->nic_data;
+
+	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
+	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
+	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
+	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
+	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
+	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
+
+	dma_addr = tx_queue->txd.buf.dma_addr;
+
+	netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
+		  tx_queue->queue, entries, (u64)dma_addr);
+
+	for (i = 0; i < entries; ++i) {
+		MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
+		dma_addr += EFX_BUF_SIZE;
+	}
+
+	inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
+
+	do {
+		MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
+				/* This flag was removed from mcdi_pcol.h for
+				 * the non-_EXT version of INIT_TXQ.  However,
+				 * firmware still honours it.
+				 */
+				INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
+				INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+				INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
+				INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
+						tx_queue->timestamping);
+
+		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
+					NULL, 0, NULL);
+		if (rc == -ENOSPC && tso_v2) {
+			/* Retry without TSOv2 if we're short on contexts. */
+			tso_v2 = false;
+			netif_warn(efx, probe, efx->net_dev,
+				   "TSOv2 context not available to segment in "
+				   "hardware. TCP performance may be reduced.\n"
+				   );
+		} else if (rc) {
+			efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
+					       MC_CMD_INIT_TXQ_EXT_IN_LEN,
+					       NULL, 0, rc);
+			goto fail;
+		}
+	} while (rc);
+
+	return 0;
+
+fail:
+	return rc;
+}
+
+void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue)
+{
+	efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
+}
+
+void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
+	MCDI_DECLARE_BUF_ERR(outbuf);
+	struct efx_nic *efx = tx_queue->efx;
+	size_t outlen;
+	int rc;
+
+	MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
+		       tx_queue->queue);
+
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf), &outlen);
+
+	if (rc && rc != -EALREADY)
+		goto fail;
+
+	return;
+
+fail:
+	efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN,
+			       outbuf, outlen, rc);
+}
+
+int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue)
+{
+	return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf,
+				    (rx_queue->ptr_mask + 1) *
+				    sizeof(efx_qword_t),
+				    GFP_KERNEL);
+}
+
+void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue)
+{
+	MCDI_DECLARE_BUF(inbuf,
+			 MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
+						EFX_BUF_SIZE));
+	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
+	size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
+	struct efx_nic *efx = rx_queue->efx;
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	dma_addr_t dma_addr;
+	size_t inlen;
+	int rc;
+	int i;
+	BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0);
+
+	rx_queue->scatter_n = 0;
+	rx_queue->scatter_len = 0;
+
+	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1);
+	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel);
+	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue));
+	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE,
+		       efx_rx_queue_index(rx_queue));
+	MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS,
+			      INIT_RXQ_IN_FLAG_PREFIX, 1,
+			      INIT_RXQ_IN_FLAG_TIMESTAMP, 1);
+	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
+	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id);
+
+	dma_addr = rx_queue->rxd.buf.dma_addr;
+
+	netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n",
+		  efx_rx_queue_index(rx_queue), entries, (u64)dma_addr);
+
+	for (i = 0; i < entries; ++i) {
+		MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr);
+		dma_addr += EFX_BUF_SIZE;
+	}
+
+	inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
+			  NULL, 0, NULL);
+	if (rc)
+		netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n",
+			    efx_rx_queue_index(rx_queue));
+}
+
+void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue)
+{
+	efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf);
+}
+
+void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
+	MCDI_DECLARE_BUF_ERR(outbuf);
+	struct efx_nic *efx = rx_queue->efx;
+	size_t outlen;
+	int rc;
+
+	MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE,
+		       efx_rx_queue_index(rx_queue));
+
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf), &outlen);
+
+	if (rc && rc != -EALREADY)
+		goto fail;
+
+	return;
+
+fail:
+	efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN,
+			       outbuf, outlen, rc);
+}
+
+int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode)
+{
+	switch (vi_window_mode) {
+	case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K:
+		efx->vi_stride = 8192;
+		break;
+	case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K:
+		efx->vi_stride = 16384;
+		break;
+	case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K:
+		efx->vi_stride = 65536;
+		break;
+	default:
+		netif_err(efx, probe, efx->net_dev,
+			  "Unrecognised VI window mode %d\n",
+			  vi_window_mode);
+		return -EIO;
+	}
+	netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n",
+		  efx->vi_stride);
+	return 0;
+}
+
+int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf,
+			  sizeof(outbuf), &outlen);
+	if (rc)
+		return rc;
+	if (outlen < sizeof(outbuf))
+		return -EIO;
+
+	*pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF);
+	return 0;
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.h b/drivers/net/ethernet/sfc/mcdi_functions.h
new file mode 100644
index 000000000000..ca4a5ac1a66b
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_functions.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_FUNCTIONS_H
+#define EFX_MCDI_FUNCTIONS_H
+
+int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis,
+		       unsigned int max_vis, unsigned int *vi_base,
+		       unsigned int *allocated_vis);
+int efx_mcdi_free_vis(struct efx_nic *efx);
+
+int efx_mcdi_ev_probe(struct efx_channel *channel);
+int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2);
+void efx_mcdi_ev_remove(struct efx_channel *channel);
+void efx_mcdi_ev_fini(struct efx_channel *channel);
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2);
+void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue);
+void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue);
+int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue);
+int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode);
+int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index fb7cde4980ed..ab5227b13ae6 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -14,106 +14,7 @@
 #include "mcdi_pcol.h"
 #include "nic.h"
 #include "selftest.h"
-
-struct efx_mcdi_phy_data {
-	u32 flags;
-	u32 type;
-	u32 supported_cap;
-	u32 channel;
-	u32 port;
-	u32 stats_mask;
-	u8 name[20];
-	u32 media;
-	u32 mmd_mask;
-	u8 revision[20];
-	u32 forced_cap;
-};
-
-static int
-efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg)
-{
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN);
-	size_t outlen;
-	int rc;
-
-	BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0);
-	BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name));
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0,
-			  outbuf, sizeof(outbuf), &outlen);
-	if (rc)
-		goto fail;
-
-	if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) {
-		rc = -EIO;
-		goto fail;
-	}
-
-	cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS);
-	cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE);
-	cfg->supported_cap =
-		MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP);
-	cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL);
-	cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT);
-	cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK);
-	memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME),
-	       sizeof(cfg->name));
-	cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE);
-	cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK);
-	memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION),
-	       sizeof(cfg->revision));
-
-	return 0;
-
-fail:
-	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
-	return rc;
-}
-
-static int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
-			     u32 flags, u32 loopback_mode,
-			     u32 loopback_speed)
-{
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN);
-	int rc;
-
-	BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0);
-
-	MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities);
-	MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags);
-	MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode);
-	MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf),
-			  NULL, 0, NULL);
-	return rc;
-}
-
-static int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes)
-{
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN);
-	size_t outlen;
-	int rc;
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0,
-			  outbuf, sizeof(outbuf), &outlen);
-	if (rc)
-		goto fail;
-
-	if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST +
-		      MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) {
-		rc = -EIO;
-		goto fail;
-	}
-
-	*loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED);
-
-	return 0;
-
-fail:
-	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
-	return rc;
-}
+#include "mcdi_port_common.h"
 
 static int efx_mcdi_mdio_read(struct net_device *net_dev,
 			      int prtad, int devad, u16 addr)
@@ -168,246 +69,6 @@ static int efx_mcdi_mdio_write(struct net_device *net_dev,
 	return 0;
 }
 
-static void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset)
-{
-	#define SET_BIT(name)	__set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
-					  linkset)
-
-	bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS);
-	switch (media) {
-	case MC_CMD_MEDIA_KX4:
-		SET_BIT(Backplane);
-		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
-			SET_BIT(1000baseKX_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
-			SET_BIT(10000baseKX4_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
-			SET_BIT(40000baseKR4_Full);
-		break;
-
-	case MC_CMD_MEDIA_XFP:
-	case MC_CMD_MEDIA_SFP_PLUS:
-	case MC_CMD_MEDIA_QSFP_PLUS:
-		SET_BIT(FIBRE);
-		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
-			SET_BIT(1000baseT_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
-			SET_BIT(10000baseT_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
-			SET_BIT(40000baseCR4_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN))
-			SET_BIT(100000baseCR4_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN))
-			SET_BIT(25000baseCR_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN))
-			SET_BIT(50000baseCR2_Full);
-		break;
-
-	case MC_CMD_MEDIA_BASE_T:
-		SET_BIT(TP);
-		if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN))
-			SET_BIT(10baseT_Half);
-		if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN))
-			SET_BIT(10baseT_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN))
-			SET_BIT(100baseT_Half);
-		if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN))
-			SET_BIT(100baseT_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN))
-			SET_BIT(1000baseT_Half);
-		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
-			SET_BIT(1000baseT_Full);
-		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
-			SET_BIT(10000baseT_Full);
-		break;
-	}
-
-	if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
-		SET_BIT(Pause);
-	if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
-		SET_BIT(Asym_Pause);
-	if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
-		SET_BIT(Autoneg);
-
-	#undef SET_BIT
-}
-
-static u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset)
-{
-	u32 result = 0;
-
-	#define TEST_BIT(name)	test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
-					 linkset)
-
-	if (TEST_BIT(10baseT_Half))
-		result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN);
-	if (TEST_BIT(10baseT_Full))
-		result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN);
-	if (TEST_BIT(100baseT_Half))
-		result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN);
-	if (TEST_BIT(100baseT_Full))
-		result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN);
-	if (TEST_BIT(1000baseT_Half))
-		result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN);
-	if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full))
-		result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN);
-	if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full))
-		result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN);
-	if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full))
-		result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN);
-	if (TEST_BIT(100000baseCR4_Full))
-		result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN);
-	if (TEST_BIT(25000baseCR_Full))
-		result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN);
-	if (TEST_BIT(50000baseCR2_Full))
-		result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN);
-	if (TEST_BIT(Pause))
-		result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN);
-	if (TEST_BIT(Asym_Pause))
-		result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN);
-	if (TEST_BIT(Autoneg))
-		result |= (1 << MC_CMD_PHY_CAP_AN_LBN);
-
-	#undef TEST_BIT
-
-	return result;
-}
-
-static u32 efx_get_mcdi_phy_flags(struct efx_nic *efx)
-{
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
-	enum efx_phy_mode mode, supported;
-	u32 flags;
-
-	/* TODO: Advertise the capabilities supported by this PHY */
-	supported = 0;
-	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN))
-		supported |= PHY_MODE_TX_DISABLED;
-	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN))
-		supported |= PHY_MODE_LOW_POWER;
-	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN))
-		supported |= PHY_MODE_OFF;
-
-	mode = efx->phy_mode & supported;
-
-	flags = 0;
-	if (mode & PHY_MODE_TX_DISABLED)
-		flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN);
-	if (mode & PHY_MODE_LOW_POWER)
-		flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN);
-	if (mode & PHY_MODE_OFF)
-		flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN);
-
-	return flags;
-}
-
-static u8 mcdi_to_ethtool_media(u32 media)
-{
-	switch (media) {
-	case MC_CMD_MEDIA_XAUI:
-	case MC_CMD_MEDIA_CX4:
-	case MC_CMD_MEDIA_KX4:
-		return PORT_OTHER;
-
-	case MC_CMD_MEDIA_XFP:
-	case MC_CMD_MEDIA_SFP_PLUS:
-	case MC_CMD_MEDIA_QSFP_PLUS:
-		return PORT_FIBRE;
-
-	case MC_CMD_MEDIA_BASE_T:
-		return PORT_TP;
-
-	default:
-		return PORT_OTHER;
-	}
-}
-
-static void efx_mcdi_phy_decode_link(struct efx_nic *efx,
-			      struct efx_link_state *link_state,
-			      u32 speed, u32 flags, u32 fcntl)
-{
-	switch (fcntl) {
-	case MC_CMD_FCNTL_AUTO:
-		WARN_ON(1);	/* This is not a link mode */
-		link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX;
-		break;
-	case MC_CMD_FCNTL_BIDIR:
-		link_state->fc = EFX_FC_TX | EFX_FC_RX;
-		break;
-	case MC_CMD_FCNTL_RESPOND:
-		link_state->fc = EFX_FC_RX;
-		break;
-	default:
-		WARN_ON(1);
-		/* Fall through */
-	case MC_CMD_FCNTL_OFF:
-		link_state->fc = 0;
-		break;
-	}
-
-	link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN));
-	link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
-	link_state->speed = speed;
-}
-
-/* The semantics of the ethtool FEC mode bitmask are not well defined,
- * particularly the meaning of combinations of bits.  Which means we get to
- * define our own semantics, as follows:
- * OFF overrides any other bits, and means "disable all FEC" (with the
- * exception of 25G KR4/CR4, where it is not possible to reject it if AN
- * partner requests it).
- * AUTO on its own means use cable requirements and link partner autoneg with
- * fw-default preferences for the cable type.
- * AUTO and either RS or BASER means use the specified FEC type if cable and
- * link partner support it, otherwise autoneg/fw-default.
- * RS or BASER alone means use the specified FEC type if cable and link partner
- * support it and either requests it, otherwise no FEC.
- * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
- * partner support it, preferring RS to BASER.
- */
-static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
-{
-	u32 ret = 0;
-
-	if (ethtool_cap & ETHTOOL_FEC_OFF)
-		return 0;
-
-	if (ethtool_cap & ETHTOOL_FEC_AUTO)
-		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
-	if (ethtool_cap & ETHTOOL_FEC_RS)
-		ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
-	if (ethtool_cap & ETHTOOL_FEC_BASER)
-		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
-		       (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
-		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
-	return ret;
-}
-
-/* Invert ethtool_fec_caps_to_mcdi.  There are two combinations that function
- * can never produce, (baser xor rs) and neither req; the implementation below
- * maps both of those to AUTO.  This should never matter, and it's not clear
- * what a better mapping would be anyway.
- */
-static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
-{
-	bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
-	     rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
-	     baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
-			    : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
-	     baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
-				: caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
-
-	if (!baser && !rs)
-		return ETHTOOL_FEC_OFF;
-	return (rs_req ? ETHTOOL_FEC_RS : 0) |
-	       (baser_req ? ETHTOOL_FEC_BASER : 0) |
-	       (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
-}
-
 static int efx_mcdi_phy_probe(struct efx_nic *efx)
 {
 	struct efx_mcdi_phy_data *phy_data;
@@ -527,58 +188,6 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
 				 efx->loopback_mode, 0);
 }
 
-/* Verify that the forced flow control settings (!EFX_FC_AUTO) are
- * supported by the link partner. Warn the user if this isn't the case
- */
-static void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa)
-{
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
-	u32 rmtadv;
-
-	/* The link partner capabilities are only relevant if the
-	 * link supports flow control autonegotiation */
-	if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
-		return;
-
-	/* If flow control autoneg is supported and enabled, then fine */
-	if (efx->wanted_fc & EFX_FC_AUTO)
-		return;
-
-	rmtadv = 0;
-	if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
-		rmtadv |= ADVERTISED_Pause;
-	if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
-		rmtadv |=  ADVERTISED_Asym_Pause;
-
-	if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause)
-		netif_err(efx, link, efx->net_dev,
-			  "warning: link partner doesn't support pause frames");
-}
-
-static bool efx_mcdi_phy_poll(struct efx_nic *efx)
-{
-	struct efx_link_state old_state = efx->link_state;
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
-	int rc;
-
-	WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
-	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
-			  outbuf, sizeof(outbuf), NULL);
-	if (rc)
-		efx->link_state.up = false;
-	else
-		efx_mcdi_phy_decode_link(
-			efx, &efx->link_state,
-			MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
-			MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
-			MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
-
-	return !efx_link_state_equal(&efx->link_state, &old_state);
-}
-
 static void efx_mcdi_phy_remove(struct efx_nic *efx)
 {
 	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
@@ -666,58 +275,6 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
 	return 0;
 }
 
-static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
-				     struct ethtool_fecparam *fec)
-{
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
-	u32 caps, active, speed; /* MCDI format */
-	bool is_25g = false;
-	size_t outlen;
-	int rc;
-
-	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
-			  outbuf, sizeof(outbuf), &outlen);
-	if (rc)
-		return rc;
-	if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
-		return -EOPNOTSUPP;
-
-	/* behaviour for 25G/50G links depends on 25G BASER bit */
-	speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
-	is_25g = speed == 25000 || speed == 50000;
-
-	caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
-	fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
-	/* BASER is never supported on 100G */
-	if (speed == 100000)
-		fec->fec &= ~ETHTOOL_FEC_BASER;
-
-	active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
-	switch (active) {
-	case MC_CMD_FEC_NONE:
-		fec->active_fec = ETHTOOL_FEC_OFF;
-		break;
-	case MC_CMD_FEC_BASER:
-		fec->active_fec = ETHTOOL_FEC_BASER;
-		break;
-	case MC_CMD_FEC_RS:
-		fec->active_fec = ETHTOOL_FEC_RS;
-		break;
-	default:
-		netif_warn(efx, hw, efx->net_dev,
-			   "Firmware reports unrecognised FEC_TYPE %u\n",
-			   active);
-		/* We don't know what firmware has picked.  AUTO is as good a
-		 * "can't happen" value as any other.
-		 */
-		fec->active_fec = ETHTOOL_FEC_AUTO;
-		break;
-	}
-
-	return 0;
-}
-
 static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
 				     const struct ethtool_fecparam *fec)
 {
@@ -745,27 +302,6 @@ static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
 	return 0;
 }
 
-static int efx_mcdi_phy_test_alive(struct efx_nic *efx)
-{
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
-	size_t outlen;
-	int rc;
-
-	BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0,
-			  outbuf, sizeof(outbuf), &outlen);
-	if (rc)
-		return rc;
-
-	if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN)
-		return -EIO;
-	if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK)
-		return -EINVAL;
-
-	return 0;
-}
-
 static const char *const mcdi_sft9001_cable_diag_names[] = {
 	"cable.pairA.length",
 	"cable.pairB.length",
@@ -1139,84 +675,6 @@ u32 efx_mcdi_phy_get_caps(struct efx_nic *efx)
 	return phy_data->supported_cap;
 }
 
-static unsigned int efx_mcdi_event_link_speed[] = {
-	[MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100,
-	[MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000,
-	[MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000,
-	[MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000,
-	[MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000,
-	[MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000,
-	[MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000,
-};
-
-void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
-{
-	u32 flags, fcntl, speed, lpa;
-
-	speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
-	EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
-	speed = efx_mcdi_event_link_speed[speed];
-
-	flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
-	fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL);
-	lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP);
-
-	/* efx->link_state is only modified by efx_mcdi_phy_get_link(),
-	 * which is only run after flushing the event queues. Therefore, it
-	 * is safe to modify the link state outside of the mac_lock here.
-	 */
-	efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl);
-
-	efx_mcdi_phy_check_fcntl(efx, lpa);
-
-	efx_link_status_changed(efx);
-}
-
-int efx_mcdi_set_mac(struct efx_nic *efx)
-{
-	u32 fcntl;
-	MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN);
-
-	BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
-
-	/* This has no effect on EF10 */
-	ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR),
-			efx->net_dev->dev_addr);
-
-	MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU,
-			EFX_MAX_FRAME_LEN(efx->net_dev->mtu));
-	MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0);
-
-	/* Set simple MAC filter for Siena */
-	MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
-			      SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
-
-	MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
-			      SET_MAC_IN_FLAG_INCLUDE_FCS,
-			      !!(efx->net_dev->features & NETIF_F_RXFCS));
-
-	switch (efx->wanted_fc) {
-	case EFX_FC_RX | EFX_FC_TX:
-		fcntl = MC_CMD_FCNTL_BIDIR;
-		break;
-	case EFX_FC_RX:
-		fcntl = MC_CMD_FCNTL_RESPOND;
-		break;
-	default:
-		fcntl = MC_CMD_FCNTL_OFF;
-		break;
-	}
-	if (efx->wanted_fc & EFX_FC_AUTO)
-		fcntl = MC_CMD_FCNTL_AUTO;
-	if (efx->fc_disable)
-		fcntl = MC_CMD_FCNTL_OFF;
-
-	MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl);
-
-	return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes),
-			    NULL, 0, NULL);
-}
-
 bool efx_mcdi_mac_check_fault(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
@@ -1348,17 +806,3 @@ void efx_mcdi_port_remove(struct efx_nic *efx)
 	efx->phy_op->remove(efx);
 	efx_nic_free_buffer(efx, &efx->stats_buffer);
 }
-
-/* Get physical port number (EF10 only; on Siena it is same as PF number) */
-int efx_mcdi_port_get_number(struct efx_nic *efx)
-{
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
-	int rc;
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0,
-			  outbuf, sizeof(outbuf), NULL);
-	if (rc)
-		return rc;
-
-	return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT);
-}
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c
new file mode 100644
index 000000000000..a6a072ba46d3
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "mcdi_port_common.h"
+#include "efx_common.h"
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0);
+	BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name));
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0,
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		goto fail;
+
+	if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) {
+		rc = -EIO;
+		goto fail;
+	}
+
+	cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS);
+	cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE);
+	cfg->supported_cap =
+		MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP);
+	cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL);
+	cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT);
+	cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK);
+	memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME),
+	       sizeof(cfg->name));
+	cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE);
+	cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK);
+	memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION),
+	       sizeof(cfg->revision));
+
+	return 0;
+
+fail:
+	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+	return rc;
+}
+
+void efx_link_set_advertising(struct efx_nic *efx,
+			      const unsigned long *advertising)
+{
+	memcpy(efx->link_advertising, advertising,
+	       sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
+
+	efx->link_advertising[0] |= ADVERTISED_Autoneg;
+	if (advertising[0] & ADVERTISED_Pause)
+		efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
+	else
+		efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
+	if (advertising[0] & ADVERTISED_Asym_Pause)
+		efx->wanted_fc ^= EFX_FC_TX;
+}
+
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+		      u32 flags, u32 loopback_mode, u32 loopback_speed)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN);
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0);
+
+	MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities);
+	MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags);
+	MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode);
+	MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed);
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf),
+			  NULL, 0, NULL);
+	return rc;
+}
+
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0,
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		goto fail;
+
+	if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST +
+		      MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) {
+		rc = -EIO;
+		goto fail;
+	}
+
+	*loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED);
+
+	return 0;
+
+fail:
+	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+	return rc;
+}
+
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset)
+{
+	#define SET_BIT(name)	__set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+					  linkset)
+
+	bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	switch (media) {
+	case MC_CMD_MEDIA_KX4:
+		SET_BIT(Backplane);
+		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+			SET_BIT(1000baseKX_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+			SET_BIT(10000baseKX4_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+			SET_BIT(40000baseKR4_Full);
+		break;
+
+	case MC_CMD_MEDIA_XFP:
+	case MC_CMD_MEDIA_SFP_PLUS:
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		SET_BIT(FIBRE);
+		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+			SET_BIT(1000baseT_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+			SET_BIT(10000baseT_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+			SET_BIT(40000baseCR4_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN))
+			SET_BIT(100000baseCR4_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN))
+			SET_BIT(25000baseCR_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN))
+			SET_BIT(50000baseCR2_Full);
+		break;
+
+	case MC_CMD_MEDIA_BASE_T:
+		SET_BIT(TP);
+		if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN))
+			SET_BIT(10baseT_Half);
+		if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN))
+			SET_BIT(10baseT_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN))
+			SET_BIT(100baseT_Half);
+		if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN))
+			SET_BIT(100baseT_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN))
+			SET_BIT(1000baseT_Half);
+		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+			SET_BIT(1000baseT_Full);
+		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+			SET_BIT(10000baseT_Full);
+		break;
+	}
+
+	if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+		SET_BIT(Pause);
+	if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+		SET_BIT(Asym_Pause);
+	if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+		SET_BIT(Autoneg);
+
+	#undef SET_BIT
+}
+
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset)
+{
+	u32 result = 0;
+
+	#define TEST_BIT(name)	test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+					 linkset)
+
+	if (TEST_BIT(10baseT_Half))
+		result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN);
+	if (TEST_BIT(10baseT_Full))
+		result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN);
+	if (TEST_BIT(100baseT_Half))
+		result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN);
+	if (TEST_BIT(100baseT_Full))
+		result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN);
+	if (TEST_BIT(1000baseT_Half))
+		result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN);
+	if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full))
+		result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN);
+	if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full))
+		result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN);
+	if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full))
+		result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN);
+	if (TEST_BIT(100000baseCR4_Full))
+		result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN);
+	if (TEST_BIT(25000baseCR_Full))
+		result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN);
+	if (TEST_BIT(50000baseCR2_Full))
+		result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN);
+	if (TEST_BIT(Pause))
+		result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN);
+	if (TEST_BIT(Asym_Pause))
+		result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN);
+	if (TEST_BIT(Autoneg))
+		result |= (1 << MC_CMD_PHY_CAP_AN_LBN);
+
+	#undef TEST_BIT
+
+	return result;
+}
+
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	enum efx_phy_mode mode, supported;
+	u32 flags;
+
+	/* TODO: Advertise the capabilities supported by this PHY */
+	supported = 0;
+	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN))
+		supported |= PHY_MODE_TX_DISABLED;
+	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN))
+		supported |= PHY_MODE_LOW_POWER;
+	if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN))
+		supported |= PHY_MODE_OFF;
+
+	mode = efx->phy_mode & supported;
+
+	flags = 0;
+	if (mode & PHY_MODE_TX_DISABLED)
+		flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN);
+	if (mode & PHY_MODE_LOW_POWER)
+		flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN);
+	if (mode & PHY_MODE_OFF)
+		flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN);
+
+	return flags;
+}
+
+u8 mcdi_to_ethtool_media(u32 media)
+{
+	switch (media) {
+	case MC_CMD_MEDIA_XAUI:
+	case MC_CMD_MEDIA_CX4:
+	case MC_CMD_MEDIA_KX4:
+		return PORT_OTHER;
+
+	case MC_CMD_MEDIA_XFP:
+	case MC_CMD_MEDIA_SFP_PLUS:
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		return PORT_FIBRE;
+
+	case MC_CMD_MEDIA_BASE_T:
+		return PORT_TP;
+
+	default:
+		return PORT_OTHER;
+	}
+}
+
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+			      struct efx_link_state *link_state,
+			      u32 speed, u32 flags, u32 fcntl)
+{
+	switch (fcntl) {
+	case MC_CMD_FCNTL_AUTO:
+		WARN_ON(1);	/* This is not a link mode */
+		link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX;
+		break;
+	case MC_CMD_FCNTL_BIDIR:
+		link_state->fc = EFX_FC_TX | EFX_FC_RX;
+		break;
+	case MC_CMD_FCNTL_RESPOND:
+		link_state->fc = EFX_FC_RX;
+		break;
+	default:
+		WARN_ON(1);
+		/* Fall through */
+	case MC_CMD_FCNTL_OFF:
+		link_state->fc = 0;
+		break;
+	}
+
+	link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN));
+	link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
+	link_state->speed = speed;
+}
+
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits.  Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+{
+	u32 ret = 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_OFF)
+		return 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_AUTO)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_RS)
+		ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_BASER)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+	return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi.  There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO.  This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+	bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+	     rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+	     baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+			    : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+	     baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+				: caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+	if (!baser && !rs)
+		return ETHTOOL_FEC_OFF;
+	return (rs_req ? ETHTOOL_FEC_RS : 0) |
+	       (baser_req ? ETHTOOL_FEC_BASER : 0) |
+	       (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
+/* Verify that the forced flow control settings (!EFX_FC_AUTO) are
+ * supported by the link partner. Warn the user if this isn't the case
+ */
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	u32 rmtadv;
+
+	/* The link partner capabilities are only relevant if the
+	 * link supports flow control autonegotiation
+	 */
+	if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+		return;
+
+	/* If flow control autoneg is supported and enabled, then fine */
+	if (efx->wanted_fc & EFX_FC_AUTO)
+		return;
+
+	rmtadv = 0;
+	if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+		rmtadv |= ADVERTISED_Pause;
+	if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+		rmtadv |=  ADVERTISED_Asym_Pause;
+
+	if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause)
+		netif_err(efx, link, efx->net_dev,
+			  "warning: link partner doesn't support pause frames");
+}
+
+bool efx_mcdi_phy_poll(struct efx_nic *efx)
+{
+	struct efx_link_state old_state = efx->link_state;
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+	int rc;
+
+	WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+			  outbuf, sizeof(outbuf), NULL);
+	if (rc)
+		efx->link_state.up = false;
+	else
+		efx_mcdi_phy_decode_link(
+			efx, &efx->link_state,
+			MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
+			MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
+			MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
+
+	return !efx_link_state_equal(&efx->link_state, &old_state);
+}
+
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+	u32 caps, active, speed; /* MCDI format */
+	bool is_25g = false;
+	size_t outlen;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		return rc;
+	if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+		return -EOPNOTSUPP;
+
+	/* behaviour for 25G/50G links depends on 25G BASER bit */
+	speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+	is_25g = speed == 25000 || speed == 50000;
+
+	caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+	fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+	/* BASER is never supported on 100G */
+	if (speed == 100000)
+		fec->fec &= ~ETHTOOL_FEC_BASER;
+
+	active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+	switch (active) {
+	case MC_CMD_FEC_NONE:
+		fec->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	case MC_CMD_FEC_BASER:
+		fec->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case MC_CMD_FEC_RS:
+		fec->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		netif_warn(efx, hw, efx->net_dev,
+			   "Firmware reports unrecognised FEC_TYPE %u\n",
+			   active);
+		/* We don't know what firmware has picked.  AUTO is as good a
+		 * "can't happen" value as any other.
+		 */
+		fec->active_fec = ETHTOOL_FEC_AUTO;
+		break;
+	}
+
+	return 0;
+}
+
+int efx_mcdi_phy_test_alive(struct efx_nic *efx)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0);
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0,
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		return rc;
+
+	if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN)
+		return -EIO;
+	if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK)
+		return -EINVAL;
+
+	return 0;
+}
+
+int efx_mcdi_set_mac(struct efx_nic *efx)
+{
+	u32 fcntl;
+	MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN);
+
+	BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
+
+	/* This has no effect on EF10 */
+	ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR),
+			efx->net_dev->dev_addr);
+
+	MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU,
+		       EFX_MAX_FRAME_LEN(efx->net_dev->mtu));
+	MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0);
+
+	/* Set simple MAC filter for Siena */
+	MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
+			      SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
+
+	MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
+			      SET_MAC_IN_FLAG_INCLUDE_FCS,
+			      !!(efx->net_dev->features & NETIF_F_RXFCS));
+
+	switch (efx->wanted_fc) {
+	case EFX_FC_RX | EFX_FC_TX:
+		fcntl = MC_CMD_FCNTL_BIDIR;
+		break;
+	case EFX_FC_RX:
+		fcntl = MC_CMD_FCNTL_RESPOND;
+		break;
+	default:
+		fcntl = MC_CMD_FCNTL_OFF;
+		break;
+	}
+	if (efx->wanted_fc & EFX_FC_AUTO)
+		fcntl = MC_CMD_FCNTL_AUTO;
+	if (efx->fc_disable)
+		fcntl = MC_CMD_FCNTL_OFF;
+
+	MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl);
+
+	return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes),
+			    NULL, 0, NULL);
+}
+
+/* Get physical port number (EF10 only; on Siena it is same as PF number) */
+int efx_mcdi_port_get_number(struct efx_nic *efx)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
+	int rc;
+
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0,
+			  outbuf, sizeof(outbuf), NULL);
+	if (rc)
+		return rc;
+
+	return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT);
+}
+
+static unsigned int efx_mcdi_event_link_speed[] = {
+	[MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100,
+	[MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000,
+	[MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000,
+	[MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000,
+	[MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000,
+	[MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000,
+	[MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000,
+};
+
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
+{
+	u32 flags, fcntl, speed, lpa;
+
+	speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
+	EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
+	speed = efx_mcdi_event_link_speed[speed];
+
+	flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
+	fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL);
+	lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP);
+
+	/* efx->link_state is only modified by efx_mcdi_phy_get_link(),
+	 * which is only run after flushing the event queues. Therefore, it
+	 * is safe to modify the link state outside of the mac_lock here.
+	 */
+	efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl);
+
+	efx_mcdi_phy_check_fcntl(efx, lpa);
+
+	efx_link_status_changed(efx);
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.h b/drivers/net/ethernet/sfc/mcdi_port_common.h
new file mode 100644
index 000000000000..b16f11265269
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_PORT_COMMON_H
+#define EFX_MCDI_PORT_COMMON_H
+
+#include "net_driver.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+
+struct efx_mcdi_phy_data {
+	u32 flags;
+	u32 type;
+	u32 supported_cap;
+	u32 channel;
+	u32 port;
+	u32 stats_mask;
+	u8 name[20];
+	u32 media;
+	u32 mmd_mask;
+	u8 revision[20];
+	u32 forced_cap;
+};
+
+#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1))
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg);
+void efx_link_set_advertising(struct efx_nic *efx,
+			      const unsigned long *advertising);
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+		      u32 flags, u32 loopback_mode, u32 loopback_speed);
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes);
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset);
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset);
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx);
+u8 mcdi_to_ethtool_media(u32 media);
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+			      struct efx_link_state *link_state,
+			      u32 speed, u32 flags, u32 fcntl);
+u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap);
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g);
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa);
+bool efx_mcdi_phy_poll(struct efx_nic *efx);
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
+			      struct ethtool_fecparam *fec);
+int efx_mcdi_phy_test_alive(struct efx_nic *efx);
+int efx_mcdi_set_mac(struct efx_nic *efx);
+int efx_mcdi_port_get_number(struct efx_nic *efx);
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index dfd5182d9e47..9f9886f222c8 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -24,7 +24,6 @@
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
 #include <linux/vmalloc.h>
-#include <linux/i2c.h>
 #include <linux/mtd/mtd.h>
 #include <net/busy_poll.h>
 #include <net/xdp.h>
@@ -139,6 +138,8 @@ struct efx_special_buffer {
  *	freed when descriptor completes
  * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data
  *	member is the associated buffer to drop a page reference on.
+ * @option: When @flags & %EFX_TX_BUF_OPTION, an EF10-specific option
+ *	descriptor.
  * @dma_addr: DMA address of the fragment.
  * @flags: Flags for allocation and DMA mapping type
  * @len: Length of this fragment.
@@ -153,7 +154,7 @@ struct efx_tx_buffer {
 		struct xdp_frame *xdpf;
 	};
 	union {
-		efx_qword_t option;
+		efx_qword_t option;    /* EF10 */
 		dma_addr_t dma_addr;
 	};
 	unsigned short flags;
@@ -743,13 +744,13 @@ union efx_multicast_hash {
 struct vfdi_status;
 
 /* The reserved RSS context value */
-#define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
+#define EFX_MCDI_RSS_CONTEXT_INVALID	0xffffffff
 /**
  * struct efx_rss_context - A user-defined RSS context for filtering
  * @list: node of linked list on which this struct is stored
  * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
- *	%EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC.
- *	For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID.
+ *	%EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ *	For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID.
  * @user_id: the rss_context ID exposed to userspace over ethtool.
  * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
  * @rx_hash_key: Toeplitz hash key for this RSS context
@@ -1611,6 +1612,15 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue,
 	return &rx_queue->buffer[index];
 }
 
+static inline struct efx_rx_buffer *
+efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
+{
+	if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+		return efx_rx_buffer(rx_queue, 0);
+	else
+		return rx_buf + 1;
+}
+
 /**
  * EFX_MAX_FRAME_LEN - calculate maximum frame length
  *
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 1f7c5717de75..6670fda8f35a 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -9,9 +9,9 @@
 #define EFX_NIC_H
 
 #include <linux/net_tstamp.h>
-#include <linux/i2c-algo-bit.h>
 #include "net_driver.h"
 #include "efx.h"
+#include "efx_common.h"
 #include "mcdi.h"
 
 enum {
@@ -506,6 +506,9 @@ static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
 	tx_queue->efx->type->tx_write(tx_queue);
 }
 
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+			bool *data_mapped);
+
 /* RX data path */
 static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue)
 {
@@ -554,6 +557,7 @@ static inline void efx_nic_eventq_read_ack(struct efx_channel *channel)
 {
 	channel->efx->type->ev_read_ack(channel);
 }
+
 void efx_nic_event_test_start(struct efx_channel *channel);
 
 /* Falcon/Siena queue operations */
@@ -671,6 +675,7 @@ struct efx_farch_register_test {
 	unsigned address;
 	efx_oword_t mask;
 };
+
 int efx_farch_test_registers(struct efx_nic *efx,
 			     const struct efx_farch_register_test *regs,
 			     size_t n_regs);
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index c29bf862a94c..a2042f16babc 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -21,6 +21,7 @@
 #include <linux/bpf_trace.h>
 #include "net_driver.h"
 #include "efx.h"
+#include "rx_common.h"
 #include "filter.h"
 #include "nic.h"
 #include "selftest.h"
@@ -32,60 +33,13 @@
 /* Maximum rx prefix used by any architecture. */
 #define EFX_MAX_RX_PREFIX_SIZE 16
 
-/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
- * ring, this number is divided by the number of buffers per page to calculate
- * the number of pages to store in the RX page recycle ring.
- */
-#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
-
 /* Size of buffer allocated for skb header area. */
 #define EFX_SKB_HEADERS  128u
 
-/* This is the percentage fill level below which new RX descriptors
- * will be added to the RX descriptor ring.
- */
-static unsigned int rx_refill_threshold;
-
 /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
 #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
 				      EFX_RX_USR_BUF_SIZE)
 
-/*
- * RX maximum head room required.
- *
- * This must be at least 1 to prevent overflow, plus one packet-worth
- * to allow pipelined receives.
- */
-#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
-
-static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
-{
-	return page_address(buf->page) + buf->page_offset;
-}
-
-static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
-{
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-	return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
-#else
-	const u8 *data = eh + efx->rx_packet_hash_offset;
-	return (u32)data[0]	  |
-	       (u32)data[1] << 8  |
-	       (u32)data[2] << 16 |
-	       (u32)data[3] << 24;
-#endif
-}
-
-static inline struct efx_rx_buffer *
-efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
-{
-	if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
-		return efx_rx_buffer(rx_queue, 0);
-	else
-		return rx_buf + 1;
-}
-
 static inline void efx_sync_rx_buffer(struct efx_nic *efx,
 				      struct efx_rx_buffer *rx_buf,
 				      unsigned int len)
@@ -94,301 +48,6 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx,
 				DMA_FROM_DEVICE);
 }
 
-void efx_rx_config_page_split(struct efx_nic *efx)
-{
-	efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
-				      XDP_PACKET_HEADROOM,
-				      EFX_RX_BUF_ALIGNMENT);
-	efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
-		((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
-		efx->rx_page_buf_step);
-	efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
-		efx->rx_bufs_per_page;
-	efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
-					       efx->rx_bufs_per_page);
-}
-
-/* Check the RX page recycle ring for a page that can be reused. */
-static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
-{
-	struct efx_nic *efx = rx_queue->efx;
-	struct page *page;
-	struct efx_rx_page_state *state;
-	unsigned index;
-
-	index = rx_queue->page_remove & rx_queue->page_ptr_mask;
-	page = rx_queue->page_ring[index];
-	if (page == NULL)
-		return NULL;
-
-	rx_queue->page_ring[index] = NULL;
-	/* page_remove cannot exceed page_add. */
-	if (rx_queue->page_remove != rx_queue->page_add)
-		++rx_queue->page_remove;
-
-	/* If page_count is 1 then we hold the only reference to this page. */
-	if (page_count(page) == 1) {
-		++rx_queue->page_recycle_count;
-		return page;
-	} else {
-		state = page_address(page);
-		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
-			       PAGE_SIZE << efx->rx_buffer_order,
-			       DMA_FROM_DEVICE);
-		put_page(page);
-		++rx_queue->page_recycle_failed;
-	}
-
-	return NULL;
-}
-
-/**
- * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
- *
- * @rx_queue:		Efx RX queue
- *
- * This allocates a batch of pages, maps them for DMA, and populates
- * struct efx_rx_buffers for each one. Return a negative error code or
- * 0 on success. If a single page can be used for multiple buffers,
- * then the page will either be inserted fully, or not at all.
- */
-static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
-{
-	struct efx_nic *efx = rx_queue->efx;
-	struct efx_rx_buffer *rx_buf;
-	struct page *page;
-	unsigned int page_offset;
-	struct efx_rx_page_state *state;
-	dma_addr_t dma_addr;
-	unsigned index, count;
-
-	count = 0;
-	do {
-		page = efx_reuse_page(rx_queue);
-		if (page == NULL) {
-			page = alloc_pages(__GFP_COMP |
-					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
-					   efx->rx_buffer_order);
-			if (unlikely(page == NULL))
-				return -ENOMEM;
-			dma_addr =
-				dma_map_page(&efx->pci_dev->dev, page, 0,
-					     PAGE_SIZE << efx->rx_buffer_order,
-					     DMA_FROM_DEVICE);
-			if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
-						       dma_addr))) {
-				__free_pages(page, efx->rx_buffer_order);
-				return -EIO;
-			}
-			state = page_address(page);
-			state->dma_addr = dma_addr;
-		} else {
-			state = page_address(page);
-			dma_addr = state->dma_addr;
-		}
-
-		dma_addr += sizeof(struct efx_rx_page_state);
-		page_offset = sizeof(struct efx_rx_page_state);
-
-		do {
-			index = rx_queue->added_count & rx_queue->ptr_mask;
-			rx_buf = efx_rx_buffer(rx_queue, index);
-			rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
-					   XDP_PACKET_HEADROOM;
-			rx_buf->page = page;
-			rx_buf->page_offset = page_offset + efx->rx_ip_align +
-					      XDP_PACKET_HEADROOM;
-			rx_buf->len = efx->rx_dma_len;
-			rx_buf->flags = 0;
-			++rx_queue->added_count;
-			get_page(page);
-			dma_addr += efx->rx_page_buf_step;
-			page_offset += efx->rx_page_buf_step;
-		} while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
-
-		rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
-	} while (++count < efx->rx_pages_per_batch);
-
-	return 0;
-}
-
-/* Unmap a DMA-mapped page.  This function is only called for the final RX
- * buffer in a page.
- */
-static void efx_unmap_rx_buffer(struct efx_nic *efx,
-				struct efx_rx_buffer *rx_buf)
-{
-	struct page *page = rx_buf->page;
-
-	if (page) {
-		struct efx_rx_page_state *state = page_address(page);
-		dma_unmap_page(&efx->pci_dev->dev,
-			       state->dma_addr,
-			       PAGE_SIZE << efx->rx_buffer_order,
-			       DMA_FROM_DEVICE);
-	}
-}
-
-static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
-				struct efx_rx_buffer *rx_buf,
-				unsigned int num_bufs)
-{
-	do {
-		if (rx_buf->page) {
-			put_page(rx_buf->page);
-			rx_buf->page = NULL;
-		}
-		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
-	} while (--num_bufs);
-}
-
-/* Attempt to recycle the page if there is an RX recycle ring; the page can
- * only be added if this is the final RX buffer, to prevent pages being used in
- * the descriptor ring and appearing in the recycle ring simultaneously.
- */
-static void efx_recycle_rx_page(struct efx_channel *channel,
-				struct efx_rx_buffer *rx_buf)
-{
-	struct page *page = rx_buf->page;
-	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-	struct efx_nic *efx = rx_queue->efx;
-	unsigned index;
-
-	/* Only recycle the page after processing the final buffer. */
-	if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
-		return;
-
-	index = rx_queue->page_add & rx_queue->page_ptr_mask;
-	if (rx_queue->page_ring[index] == NULL) {
-		unsigned read_index = rx_queue->page_remove &
-			rx_queue->page_ptr_mask;
-
-		/* The next slot in the recycle ring is available, but
-		 * increment page_remove if the read pointer currently
-		 * points here.
-		 */
-		if (read_index == index)
-			++rx_queue->page_remove;
-		rx_queue->page_ring[index] = page;
-		++rx_queue->page_add;
-		return;
-	}
-	++rx_queue->page_recycle_full;
-	efx_unmap_rx_buffer(efx, rx_buf);
-	put_page(rx_buf->page);
-}
-
-static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
-			       struct efx_rx_buffer *rx_buf)
-{
-	/* Release the page reference we hold for the buffer. */
-	if (rx_buf->page)
-		put_page(rx_buf->page);
-
-	/* If this is the last buffer in a page, unmap and free it. */
-	if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
-		efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
-		efx_free_rx_buffers(rx_queue, rx_buf, 1);
-	}
-	rx_buf->page = NULL;
-}
-
-/* Recycle the pages that are used by buffers that have just been received. */
-static void efx_recycle_rx_pages(struct efx_channel *channel,
-				 struct efx_rx_buffer *rx_buf,
-				 unsigned int n_frags)
-{
-	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-
-	do {
-		efx_recycle_rx_page(channel, rx_buf);
-		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
-	} while (--n_frags);
-}
-
-static void efx_discard_rx_packet(struct efx_channel *channel,
-				  struct efx_rx_buffer *rx_buf,
-				  unsigned int n_frags)
-{
-	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-
-	efx_recycle_rx_pages(channel, rx_buf, n_frags);
-
-	efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
-}
-
-/**
- * efx_fast_push_rx_descriptors - push new RX descriptors quickly
- * @rx_queue:		RX descriptor queue
- *
- * This will aim to fill the RX descriptor queue up to
- * @rx_queue->@max_fill. If there is insufficient atomic
- * memory to do so, a slow fill will be scheduled.
- *
- * The caller must provide serialisation (none is used here). In practise,
- * this means this function must run from the NAPI handler, or be called
- * when NAPI is disabled.
- */
-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
-{
-	struct efx_nic *efx = rx_queue->efx;
-	unsigned int fill_level, batch_size;
-	int space, rc = 0;
-
-	if (!rx_queue->refill_enabled)
-		return;
-
-	/* Calculate current fill level, and exit if we don't need to fill */
-	fill_level = (rx_queue->added_count - rx_queue->removed_count);
-	EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
-	if (fill_level >= rx_queue->fast_fill_trigger)
-		goto out;
-
-	/* Record minimum fill level */
-	if (unlikely(fill_level < rx_queue->min_fill)) {
-		if (fill_level)
-			rx_queue->min_fill = fill_level;
-	}
-
-	batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
-	space = rx_queue->max_fill - fill_level;
-	EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
-
-	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
-		   "RX queue %d fast-filling descriptor ring from"
-		   " level %d to level %d\n",
-		   efx_rx_queue_index(rx_queue), fill_level,
-		   rx_queue->max_fill);
-
-
-	do {
-		rc = efx_init_rx_buffers(rx_queue, atomic);
-		if (unlikely(rc)) {
-			/* Ensure that we don't leave the rx queue empty */
-			efx_schedule_slow_fill(rx_queue);
-			goto out;
-		}
-	} while ((space -= batch_size) >= batch_size);
-
-	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
-		   "RX queue %d fast-filled descriptor ring "
-		   "to level %d\n", efx_rx_queue_index(rx_queue),
-		   rx_queue->added_count - rx_queue->removed_count);
-
- out:
-	if (rx_queue->notified_count != rx_queue->added_count)
-		efx_nic_notify_rx_desc(rx_queue);
-}
-
-void efx_rx_slow_fill(struct timer_list *t)
-{
-	struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
-
-	/* Post an event to cause NAPI to run and refill the queue */
-	efx_nic_generate_fill_event(rx_queue);
-	++rx_queue->slow_fill_count;
-}
-
 static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
 				     struct efx_rx_buffer *rx_buf,
 				     int len)
@@ -412,53 +71,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
 	efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
 }
 
-/* Pass a received packet up through GRO.  GRO can handle pages
- * regardless of checksum state and skbs with a good checksum.
- */
-static void
-efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
-		  unsigned int n_frags, u8 *eh)
-{
-	struct napi_struct *napi = &channel->napi_str;
-	struct efx_nic *efx = channel->efx;
-	struct sk_buff *skb;
-
-	skb = napi_get_frags(napi);
-	if (unlikely(!skb)) {
-		struct efx_rx_queue *rx_queue;
-
-		rx_queue = efx_channel_get_rx_queue(channel);
-		efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
-		return;
-	}
-
-	if (efx->net_dev->features & NETIF_F_RXHASH)
-		skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
-			     PKT_HASH_TYPE_L3);
-	skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
-			  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
-	skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
-
-	for (;;) {
-		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
-				   rx_buf->page, rx_buf->page_offset,
-				   rx_buf->len);
-		rx_buf->page = NULL;
-		skb->len += rx_buf->len;
-		if (skb_shinfo(skb)->nr_frags == n_frags)
-			break;
-
-		rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
-	}
-
-	skb->data_len = skb->len;
-	skb->truesize += n_frags * efx->rx_buffer_truesize;
-
-	skb_record_rx_queue(skb, channel->rx_queue.core_index);
-
-	napi_gro_frags(napi);
-}
-
 /* Allocate and construct an SKB around page fragments */
 static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 				     struct efx_rx_buffer *rx_buf,
@@ -805,174 +417,6 @@ out:
 	channel->rx_pkt_n_frags = 0;
 }
 
-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
-{
-	struct efx_nic *efx = rx_queue->efx;
-	unsigned int entries;
-	int rc;
-
-	/* Create the smallest power-of-two aligned ring */
-	entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
-	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
-	rx_queue->ptr_mask = entries - 1;
-
-	netif_dbg(efx, probe, efx->net_dev,
-		  "creating RX queue %d size %#x mask %#x\n",
-		  efx_rx_queue_index(rx_queue), efx->rxq_entries,
-		  rx_queue->ptr_mask);
-
-	/* Allocate RX buffers */
-	rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
-				   GFP_KERNEL);
-	if (!rx_queue->buffer)
-		return -ENOMEM;
-
-	rc = efx_nic_probe_rx(rx_queue);
-	if (rc) {
-		kfree(rx_queue->buffer);
-		rx_queue->buffer = NULL;
-	}
-
-	return rc;
-}
-
-static void efx_init_rx_recycle_ring(struct efx_nic *efx,
-				     struct efx_rx_queue *rx_queue)
-{
-	unsigned int bufs_in_recycle_ring, page_ring_size;
-
-	/* Set the RX recycle ring size */
-#ifdef CONFIG_PPC64
-	bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
-#else
-	if (iommu_present(&pci_bus_type))
-		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
-	else
-		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
-#endif /* CONFIG_PPC64 */
-
-	page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
-					    efx->rx_bufs_per_page);
-	rx_queue->page_ring = kcalloc(page_ring_size,
-				      sizeof(*rx_queue->page_ring), GFP_KERNEL);
-	rx_queue->page_ptr_mask = page_ring_size - 1;
-}
-
-void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
-{
-	struct efx_nic *efx = rx_queue->efx;
-	unsigned int max_fill, trigger, max_trigger;
-	int rc = 0;
-
-	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
-		  "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
-
-	/* Initialise ptr fields */
-	rx_queue->added_count = 0;
-	rx_queue->notified_count = 0;
-	rx_queue->removed_count = 0;
-	rx_queue->min_fill = -1U;
-	efx_init_rx_recycle_ring(efx, rx_queue);
-
-	rx_queue->page_remove = 0;
-	rx_queue->page_add = rx_queue->page_ptr_mask + 1;
-	rx_queue->page_recycle_count = 0;
-	rx_queue->page_recycle_failed = 0;
-	rx_queue->page_recycle_full = 0;
-
-	/* Initialise limit fields */
-	max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
-	max_trigger =
-		max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
-	if (rx_refill_threshold != 0) {
-		trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
-		if (trigger > max_trigger)
-			trigger = max_trigger;
-	} else {
-		trigger = max_trigger;
-	}
-
-	rx_queue->max_fill = max_fill;
-	rx_queue->fast_fill_trigger = trigger;
-	rx_queue->refill_enabled = true;
-
-	/* Initialise XDP queue information */
-	rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
-			      rx_queue->core_index);
-
-	if (rc) {
-		netif_err(efx, rx_err, efx->net_dev,
-			  "Failure to initialise XDP queue information rc=%d\n",
-			  rc);
-		efx->xdp_rxq_info_failed = true;
-	} else {
-		rx_queue->xdp_rxq_info_valid = true;
-	}
-
-	/* Set up RX descriptor ring */
-	efx_nic_init_rx(rx_queue);
-}
-
-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
-{
-	int i;
-	struct efx_nic *efx = rx_queue->efx;
-	struct efx_rx_buffer *rx_buf;
-
-	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
-		  "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
-
-	del_timer_sync(&rx_queue->slow_fill);
-
-	/* Release RX buffers from the current read ptr to the write ptr */
-	if (rx_queue->buffer) {
-		for (i = rx_queue->removed_count; i < rx_queue->added_count;
-		     i++) {
-			unsigned index = i & rx_queue->ptr_mask;
-			rx_buf = efx_rx_buffer(rx_queue, index);
-			efx_fini_rx_buffer(rx_queue, rx_buf);
-		}
-	}
-
-	/* Unmap and release the pages in the recycle ring. Remove the ring. */
-	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
-		struct page *page = rx_queue->page_ring[i];
-		struct efx_rx_page_state *state;
-
-		if (page == NULL)
-			continue;
-
-		state = page_address(page);
-		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
-			       PAGE_SIZE << efx->rx_buffer_order,
-			       DMA_FROM_DEVICE);
-		put_page(page);
-	}
-	kfree(rx_queue->page_ring);
-	rx_queue->page_ring = NULL;
-
-	if (rx_queue->xdp_rxq_info_valid)
-		xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
-
-	rx_queue->xdp_rxq_info_valid = false;
-}
-
-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
-{
-	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
-		  "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
-
-	efx_nic_remove_rx(rx_queue);
-
-	kfree(rx_queue->buffer);
-	rx_queue->buffer = NULL;
-}
-
-
-module_param(rx_refill_threshold, uint, 0444);
-MODULE_PARM_DESC(rx_refill_threshold,
-		 "RX descriptor ring refill threshold (%)");
-
 #ifdef CONFIG_RFS_ACCEL
 
 static void efx_filter_rfs_work(struct work_struct *data)
@@ -1206,37 +650,3 @@ bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
 }
 
 #endif /* CONFIG_RFS_ACCEL */
-
-/**
- * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
- * @spec: Specification to test
- *
- * Return: %true if the specification is a non-drop RX filter that
- * matches a local MAC address I/G bit value of 1 or matches a local
- * IPv4 or IPv6 address value in the respective multicast address
- * range.  Otherwise %false.
- */
-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
-{
-	if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
-	    spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
-		return false;
-
-	if (spec->match_flags &
-	    (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
-	    is_multicast_ether_addr(spec->loc_mac))
-		return true;
-
-	if ((spec->match_flags &
-	     (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
-	    (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
-		if (spec->ether_type == htons(ETH_P_IP) &&
-		    ipv4_is_multicast(spec->loc_host[0]))
-			return true;
-		if (spec->ether_type == htons(ETH_P_IPV6) &&
-		    ((const u8 *)spec->loc_host)[0] == 0xff)
-			return true;
-	}
-
-	return false;
-}
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
new file mode 100644
index 000000000000..ee8beb87bdc1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include <linux/iommu.h>
+#include "efx.h"
+#include "nic.h"
+#include "rx_common.h"
+
+/* This is the percentage fill level below which new RX descriptors
+ * will be added to the RX descriptor ring.
+ */
+static unsigned int rx_refill_threshold;
+module_param(rx_refill_threshold, uint, 0444);
+MODULE_PARM_DESC(rx_refill_threshold,
+		 "RX descriptor ring refill threshold (%)");
+
+/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
+#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
+
+/* RX maximum head room required.
+ *
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
+ */
+#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
+
+/* Check the RX page recycle ring for a page that can be reused. */
+static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
+{
+	struct efx_nic *efx = rx_queue->efx;
+	struct efx_rx_page_state *state;
+	unsigned int index;
+	struct page *page;
+
+	index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+	page = rx_queue->page_ring[index];
+	if (page == NULL)
+		return NULL;
+
+	rx_queue->page_ring[index] = NULL;
+	/* page_remove cannot exceed page_add. */
+	if (rx_queue->page_remove != rx_queue->page_add)
+		++rx_queue->page_remove;
+
+	/* If page_count is 1 then we hold the only reference to this page. */
+	if (page_count(page) == 1) {
+		++rx_queue->page_recycle_count;
+		return page;
+	} else {
+		state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+		put_page(page);
+		++rx_queue->page_recycle_failed;
+	}
+
+	return NULL;
+}
+
+/* Attempt to recycle the page if there is an RX recycle ring; the page can
+ * only be added if this is the final RX buffer, to prevent pages being used in
+ * the descriptor ring and appearing in the recycle ring simultaneously.
+ */
+static void efx_recycle_rx_page(struct efx_channel *channel,
+				struct efx_rx_buffer *rx_buf)
+{
+	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+	struct efx_nic *efx = rx_queue->efx;
+	struct page *page = rx_buf->page;
+	unsigned int index;
+
+	/* Only recycle the page after processing the final buffer. */
+	if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
+		return;
+
+	index = rx_queue->page_add & rx_queue->page_ptr_mask;
+	if (rx_queue->page_ring[index] == NULL) {
+		unsigned int read_index = rx_queue->page_remove &
+			rx_queue->page_ptr_mask;
+
+		/* The next slot in the recycle ring is available, but
+		 * increment page_remove if the read pointer currently
+		 * points here.
+		 */
+		if (read_index == index)
+			++rx_queue->page_remove;
+		rx_queue->page_ring[index] = page;
+		++rx_queue->page_add;
+		return;
+	}
+	++rx_queue->page_recycle_full;
+	efx_unmap_rx_buffer(efx, rx_buf);
+	put_page(rx_buf->page);
+}
+
+/* Recycle the pages that are used by buffers that have just been received. */
+void efx_recycle_rx_pages(struct efx_channel *channel,
+			  struct efx_rx_buffer *rx_buf,
+			  unsigned int n_frags)
+{
+	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+	do {
+		efx_recycle_rx_page(channel, rx_buf);
+		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+	} while (--n_frags);
+}
+
+void efx_discard_rx_packet(struct efx_channel *channel,
+			   struct efx_rx_buffer *rx_buf,
+			   unsigned int n_frags)
+{
+	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+	efx_recycle_rx_pages(channel, rx_buf, n_frags);
+
+	efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+}
+
+static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+	unsigned int bufs_in_recycle_ring, page_ring_size;
+	struct efx_nic *efx = rx_queue->efx;
+
+	/* Set the RX recycle ring size */
+#ifdef CONFIG_PPC64
+	bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+#else
+	if (iommu_present(&pci_bus_type))
+		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+	else
+		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
+#endif /* CONFIG_PPC64 */
+
+	page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
+					    efx->rx_bufs_per_page);
+	rx_queue->page_ring = kcalloc(page_ring_size,
+				      sizeof(*rx_queue->page_ring), GFP_KERNEL);
+	rx_queue->page_ptr_mask = page_ring_size - 1;
+}
+
+static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+	struct efx_nic *efx = rx_queue->efx;
+	int i;
+
+	/* Unmap and release the pages in the recycle ring. Remove the ring. */
+	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+		struct page *page = rx_queue->page_ring[i];
+		struct efx_rx_page_state *state;
+
+		if (page == NULL)
+			continue;
+
+		state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+		put_page(page);
+	}
+	kfree(rx_queue->page_ring);
+	rx_queue->page_ring = NULL;
+}
+
+static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
+			       struct efx_rx_buffer *rx_buf)
+{
+	/* Release the page reference we hold for the buffer. */
+	if (rx_buf->page)
+		put_page(rx_buf->page);
+
+	/* If this is the last buffer in a page, unmap and free it. */
+	if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
+		efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
+		efx_free_rx_buffers(rx_queue, rx_buf, 1);
+	}
+	rx_buf->page = NULL;
+}
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
+{
+	struct efx_nic *efx = rx_queue->efx;
+	unsigned int entries;
+	int rc;
+
+	/* Create the smallest power-of-two aligned ring */
+	entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
+	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+	rx_queue->ptr_mask = entries - 1;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "creating RX queue %d size %#x mask %#x\n",
+		  efx_rx_queue_index(rx_queue), efx->rxq_entries,
+		  rx_queue->ptr_mask);
+
+	/* Allocate RX buffers */
+	rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
+				   GFP_KERNEL);
+	if (!rx_queue->buffer)
+		return -ENOMEM;
+
+	rc = efx_nic_probe_rx(rx_queue);
+	if (rc) {
+		kfree(rx_queue->buffer);
+		rx_queue->buffer = NULL;
+	}
+
+	return rc;
+}
+
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
+{
+	unsigned int max_fill, trigger, max_trigger;
+	struct efx_nic *efx = rx_queue->efx;
+	int rc = 0;
+
+	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+		  "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+	/* Initialise ptr fields */
+	rx_queue->added_count = 0;
+	rx_queue->notified_count = 0;
+	rx_queue->removed_count = 0;
+	rx_queue->min_fill = -1U;
+	efx_init_rx_recycle_ring(rx_queue);
+
+	rx_queue->page_remove = 0;
+	rx_queue->page_add = rx_queue->page_ptr_mask + 1;
+	rx_queue->page_recycle_count = 0;
+	rx_queue->page_recycle_failed = 0;
+	rx_queue->page_recycle_full = 0;
+
+	/* Initialise limit fields */
+	max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
+	max_trigger =
+		max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+	if (rx_refill_threshold != 0) {
+		trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
+		if (trigger > max_trigger)
+			trigger = max_trigger;
+	} else {
+		trigger = max_trigger;
+	}
+
+	rx_queue->max_fill = max_fill;
+	rx_queue->fast_fill_trigger = trigger;
+	rx_queue->refill_enabled = true;
+
+	/* Initialise XDP queue information */
+	rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
+			      rx_queue->core_index);
+
+	if (rc) {
+		netif_err(efx, rx_err, efx->net_dev,
+			  "Failure to initialise XDP queue information rc=%d\n",
+			  rc);
+		efx->xdp_rxq_info_failed = true;
+	} else {
+		rx_queue->xdp_rxq_info_valid = true;
+	}
+
+	/* Set up RX descriptor ring */
+	efx_nic_init_rx(rx_queue);
+}
+
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
+{
+	struct efx_rx_buffer *rx_buf;
+	int i;
+
+	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+		  "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+	del_timer_sync(&rx_queue->slow_fill);
+
+	/* Release RX buffers from the current read ptr to the write ptr */
+	if (rx_queue->buffer) {
+		for (i = rx_queue->removed_count; i < rx_queue->added_count;
+		     i++) {
+			unsigned int index = i & rx_queue->ptr_mask;
+
+			rx_buf = efx_rx_buffer(rx_queue, index);
+			efx_fini_rx_buffer(rx_queue, rx_buf);
+		}
+	}
+
+	efx_fini_rx_recycle_ring(rx_queue);
+
+	if (rx_queue->xdp_rxq_info_valid)
+		xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
+
+	rx_queue->xdp_rxq_info_valid = false;
+}
+
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
+{
+	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+		  "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+	efx_nic_remove_rx(rx_queue);
+
+	kfree(rx_queue->buffer);
+	rx_queue->buffer = NULL;
+}
+
+/* Unmap a DMA-mapped page.  This function is only called for the final RX
+ * buffer in a page.
+ */
+void efx_unmap_rx_buffer(struct efx_nic *efx,
+			 struct efx_rx_buffer *rx_buf)
+{
+	struct page *page = rx_buf->page;
+
+	if (page) {
+		struct efx_rx_page_state *state = page_address(page);
+
+		dma_unmap_page(&efx->pci_dev->dev,
+			       state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+	}
+}
+
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+			 struct efx_rx_buffer *rx_buf,
+			 unsigned int num_bufs)
+{
+	do {
+		if (rx_buf->page) {
+			put_page(rx_buf->page);
+			rx_buf->page = NULL;
+		}
+		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+	} while (--num_bufs);
+}
+
+void efx_rx_slow_fill(struct timer_list *t)
+{
+	struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
+
+	/* Post an event to cause NAPI to run and refill the queue */
+	efx_nic_generate_fill_event(rx_queue);
+	++rx_queue->slow_fill_count;
+}
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
+{
+	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
+}
+
+/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
+ *
+ * @rx_queue:		Efx RX queue
+ *
+ * This allocates a batch of pages, maps them for DMA, and populates
+ * struct efx_rx_buffers for each one. Return a negative error code or
+ * 0 on success. If a single page can be used for multiple buffers,
+ * then the page will either be inserted fully, or not at all.
+ */
+static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
+{
+	unsigned int page_offset, index, count;
+	struct efx_nic *efx = rx_queue->efx;
+	struct efx_rx_page_state *state;
+	struct efx_rx_buffer *rx_buf;
+	dma_addr_t dma_addr;
+	struct page *page;
+
+	count = 0;
+	do {
+		page = efx_reuse_page(rx_queue);
+		if (page == NULL) {
+			page = alloc_pages(__GFP_COMP |
+					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
+					   efx->rx_buffer_order);
+			if (unlikely(page == NULL))
+				return -ENOMEM;
+			dma_addr =
+				dma_map_page(&efx->pci_dev->dev, page, 0,
+					     PAGE_SIZE << efx->rx_buffer_order,
+					     DMA_FROM_DEVICE);
+			if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
+						       dma_addr))) {
+				__free_pages(page, efx->rx_buffer_order);
+				return -EIO;
+			}
+			state = page_address(page);
+			state->dma_addr = dma_addr;
+		} else {
+			state = page_address(page);
+			dma_addr = state->dma_addr;
+		}
+
+		dma_addr += sizeof(struct efx_rx_page_state);
+		page_offset = sizeof(struct efx_rx_page_state);
+
+		do {
+			index = rx_queue->added_count & rx_queue->ptr_mask;
+			rx_buf = efx_rx_buffer(rx_queue, index);
+			rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
+					   XDP_PACKET_HEADROOM;
+			rx_buf->page = page;
+			rx_buf->page_offset = page_offset + efx->rx_ip_align +
+					      XDP_PACKET_HEADROOM;
+			rx_buf->len = efx->rx_dma_len;
+			rx_buf->flags = 0;
+			++rx_queue->added_count;
+			get_page(page);
+			dma_addr += efx->rx_page_buf_step;
+			page_offset += efx->rx_page_buf_step;
+		} while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
+
+		rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
+	} while (++count < efx->rx_pages_per_batch);
+
+	return 0;
+}
+
+void efx_rx_config_page_split(struct efx_nic *efx)
+{
+	efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
+				      XDP_PACKET_HEADROOM,
+				      EFX_RX_BUF_ALIGNMENT);
+	efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
+		((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
+		efx->rx_page_buf_step);
+	efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
+		efx->rx_bufs_per_page;
+	efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
+					       efx->rx_bufs_per_page);
+}
+
+/* efx_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue:		RX descriptor queue
+ *
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@max_fill. If there is insufficient atomic
+ * memory to do so, a slow fill will be scheduled.
+ *
+ * The caller must provide serialisation (none is used here). In practise,
+ * this means this function must run from the NAPI handler, or be called
+ * when NAPI is disabled.
+ */
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
+{
+	struct efx_nic *efx = rx_queue->efx;
+	unsigned int fill_level, batch_size;
+	int space, rc = 0;
+
+	if (!rx_queue->refill_enabled)
+		return;
+
+	/* Calculate current fill level, and exit if we don't need to fill */
+	fill_level = (rx_queue->added_count - rx_queue->removed_count);
+	EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
+	if (fill_level >= rx_queue->fast_fill_trigger)
+		goto out;
+
+	/* Record minimum fill level */
+	if (unlikely(fill_level < rx_queue->min_fill)) {
+		if (fill_level)
+			rx_queue->min_fill = fill_level;
+	}
+
+	batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+	space = rx_queue->max_fill - fill_level;
+	EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
+
+	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+		   "RX queue %d fast-filling descriptor ring from"
+		   " level %d to level %d\n",
+		   efx_rx_queue_index(rx_queue), fill_level,
+		   rx_queue->max_fill);
+
+	do {
+		rc = efx_init_rx_buffers(rx_queue, atomic);
+		if (unlikely(rc)) {
+			/* Ensure that we don't leave the rx queue empty */
+			efx_schedule_slow_fill(rx_queue);
+			goto out;
+		}
+	} while ((space -= batch_size) >= batch_size);
+
+	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+		   "RX queue %d fast-filled descriptor ring "
+		   "to level %d\n", efx_rx_queue_index(rx_queue),
+		   rx_queue->added_count - rx_queue->removed_count);
+
+ out:
+	if (rx_queue->notified_count != rx_queue->added_count)
+		efx_nic_notify_rx_desc(rx_queue);
+}
+
+/* Pass a received packet up through GRO.  GRO can handle pages
+ * regardless of checksum state and skbs with a good checksum.
+ */
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+		  unsigned int n_frags, u8 *eh)
+{
+	struct napi_struct *napi = &channel->napi_str;
+	struct efx_nic *efx = channel->efx;
+	struct sk_buff *skb;
+
+	skb = napi_get_frags(napi);
+	if (unlikely(!skb)) {
+		struct efx_rx_queue *rx_queue;
+
+		rx_queue = efx_channel_get_rx_queue(channel);
+		efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+		return;
+	}
+
+	if (efx->net_dev->features & NETIF_F_RXHASH)
+		skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
+			     PKT_HASH_TYPE_L3);
+	skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
+			  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+	skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
+
+	for (;;) {
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+				   rx_buf->page, rx_buf->page_offset,
+				   rx_buf->len);
+		rx_buf->page = NULL;
+		skb->len += rx_buf->len;
+		if (skb_shinfo(skb)->nr_frags == n_frags)
+			break;
+
+		rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+	}
+
+	skb->data_len = skb->len;
+	skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+	skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+	napi_gro_frags(napi);
+}
+
+/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
+{
+	struct list_head *head = &efx->rss_context.list;
+	struct efx_rss_context *ctx, *new;
+	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	/* Search for first gap in the numbering */
+	list_for_each_entry(ctx, head, list) {
+		if (ctx->user_id != id)
+			break;
+		id++;
+		/* Check for wrap.  If this happens, we have nearly 2^32
+		 * allocated RSS contexts, which seems unlikely.
+		 */
+		if (WARN_ON_ONCE(!id))
+			return NULL;
+	}
+
+	/* Create the new entry */
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return NULL;
+	new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+	new->rx_hash_udp_4tuple = false;
+
+	/* Insert the new entry into the gap */
+	new->user_id = id;
+	list_add_tail(&new->list, &ctx->list);
+	return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
+{
+	struct list_head *head = &efx->rss_context.list;
+	struct efx_rss_context *ctx;
+
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	list_for_each_entry(ctx, head, list)
+		if (ctx->user_id == id)
+			return ctx;
+	return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+	list_del(&ctx->list);
+	kfree(ctx);
+}
+
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] =
+			ethtool_rxfh_indir_default(i, efx->rss_spread);
+}
+
+/**
+ * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
+ * @spec: Specification to test
+ *
+ * Return: %true if the specification is a non-drop RX filter that
+ * matches a local MAC address I/G bit value of 1 or matches a local
+ * IPv4 or IPv6 address value in the respective multicast address
+ * range.  Otherwise %false.
+ */
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
+{
+	if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
+	    spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
+		return false;
+
+	if (spec->match_flags &
+	    (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
+	    is_multicast_ether_addr(spec->loc_mac))
+		return true;
+
+	if ((spec->match_flags &
+	     (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
+	    (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
+		if (spec->ether_type == htons(ETH_P_IP) &&
+		    ipv4_is_multicast(spec->loc_host[0]))
+			return true;
+		if (spec->ether_type == htons(ETH_P_IPV6) &&
+		    ((const u8 *)spec->loc_host)[0] == 0xff)
+			return true;
+	}
+
+	return false;
+}
+
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+			   const struct efx_filter_spec *right)
+{
+	if ((left->match_flags ^ right->match_flags) |
+	    ((left->flags ^ right->flags) &
+	     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
+		return false;
+
+	return memcmp(&left->outer_vid, &right->outer_vid,
+		      sizeof(struct efx_filter_spec) -
+		      offsetof(struct efx_filter_spec, outer_vid)) == 0;
+}
+
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
+{
+	BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
+	return jhash2((const u32 *)&spec->outer_vid,
+		      (sizeof(struct efx_filter_spec) -
+		       offsetof(struct efx_filter_spec, outer_vid)) / 4,
+		      0);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+			bool *force)
+{
+	if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
+		/* ARFS is currently updating this entry, leave it */
+		return false;
+	}
+	if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
+		/* ARFS tried and failed to update this, so it's probably out
+		 * of date.  Remove the filter and the ARFS rule entry.
+		 */
+		rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+		*force = true;
+		return true;
+	} else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
+		/* ARFS has moved on, so old filter is not needed.  Since we did
+		 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
+		 * not be removed by efx_rps_hash_del() subsequently.
+		 */
+		*force = true;
+		return true;
+	}
+	/* Remove it iff ARFS wants to. */
+	return true;
+}
+
+static
+struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
+				       const struct efx_filter_spec *spec)
+{
+	u32 hash = efx_filter_spec_hash(spec);
+
+	lockdep_assert_held(&efx->rps_hash_lock);
+	if (!efx->rps_hash_table)
+		return NULL;
+	return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
+}
+
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+					const struct efx_filter_spec *spec)
+{
+	struct efx_arfs_rule *rule;
+	struct hlist_head *head;
+	struct hlist_node *node;
+
+	head = efx_rps_hash_bucket(efx, spec);
+	if (!head)
+		return NULL;
+	hlist_for_each(node, head) {
+		rule = container_of(node, struct efx_arfs_rule, node);
+		if (efx_filter_spec_equal(spec, &rule->spec))
+			return rule;
+	}
+	return NULL;
+}
+
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+				       const struct efx_filter_spec *spec,
+				       bool *new)
+{
+	struct efx_arfs_rule *rule;
+	struct hlist_head *head;
+	struct hlist_node *node;
+
+	head = efx_rps_hash_bucket(efx, spec);
+	if (!head)
+		return NULL;
+	hlist_for_each(node, head) {
+		rule = container_of(node, struct efx_arfs_rule, node);
+		if (efx_filter_spec_equal(spec, &rule->spec)) {
+			*new = false;
+			return rule;
+		}
+	}
+	rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
+	*new = true;
+	if (rule) {
+		memcpy(&rule->spec, spec, sizeof(rule->spec));
+		hlist_add_head(&rule->node, head);
+	}
+	return rule;
+}
+
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
+{
+	struct efx_arfs_rule *rule;
+	struct hlist_head *head;
+	struct hlist_node *node;
+
+	head = efx_rps_hash_bucket(efx, spec);
+	if (WARN_ON(!head))
+		return;
+	hlist_for_each(node, head) {
+		rule = container_of(node, struct efx_arfs_rule, node);
+		if (efx_filter_spec_equal(spec, &rule->spec)) {
+			/* Someone already reused the entry.  We know that if
+			 * this check doesn't fire (i.e. filter_id == REMOVING)
+			 * then the REMOVING mark was put there by our caller,
+			 * because caller is holding a lock on filter table and
+			 * only holders of that lock set REMOVING.
+			 */
+			if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
+				return;
+			hlist_del(node);
+			kfree(rule);
+			return;
+		}
+	}
+	/* We didn't find it. */
+	WARN_ON(1);
+}
+#endif
+
+int efx_probe_filters(struct efx_nic *efx)
+{
+	int rc;
+
+	init_rwsem(&efx->filter_sem);
+	mutex_lock(&efx->mac_lock);
+	down_write(&efx->filter_sem);
+	rc = efx->type->filter_table_probe(efx);
+	if (rc)
+		goto out_unlock;
+
+#ifdef CONFIG_RFS_ACCEL
+	if (efx->type->offload_features & NETIF_F_NTUPLE) {
+		struct efx_channel *channel;
+		int i, success = 1;
+
+		efx_for_each_channel(channel, efx) {
+			channel->rps_flow_id =
+				kcalloc(efx->type->max_rx_ip_filters,
+					sizeof(*channel->rps_flow_id),
+					GFP_KERNEL);
+			if (!channel->rps_flow_id)
+				success = 0;
+			else
+				for (i = 0;
+				     i < efx->type->max_rx_ip_filters;
+				     ++i)
+					channel->rps_flow_id[i] =
+						RPS_FLOW_ID_INVALID;
+			channel->rfs_expire_index = 0;
+			channel->rfs_filter_count = 0;
+		}
+
+		if (!success) {
+			efx_for_each_channel(channel, efx)
+				kfree(channel->rps_flow_id);
+			efx->type->filter_table_remove(efx);
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
+	}
+#endif
+out_unlock:
+	up_write(&efx->filter_sem);
+	mutex_unlock(&efx->mac_lock);
+	return rc;
+}
+
+void efx_remove_filters(struct efx_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct efx_channel *channel;
+
+	efx_for_each_channel(channel, efx) {
+		cancel_delayed_work_sync(&channel->filter_work);
+		kfree(channel->rps_flow_id);
+	}
+#endif
+	down_write(&efx->filter_sem);
+	efx->type->filter_table_remove(efx);
+	up_write(&efx->filter_sem);
+}
diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h
new file mode 100644
index 000000000000..c41f12a89477
--- /dev/null
+++ b/drivers/net/ethernet/sfc/rx_common.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_RX_COMMON_H
+#define EFX_RX_COMMON_H
+
+/* Preferred number of descriptors to fill at once */
+#define EFX_RX_PREFERRED_BATCH 8U
+
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
+				      EFX_RX_USR_BUF_SIZE)
+
+static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
+{
+	return page_address(buf->page) + buf->page_offset;
+}
+
+static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
+#else
+	const u8 *data = eh + efx->rx_packet_hash_offset;
+
+	return (u32)data[0]	  |
+	       (u32)data[1] << 8  |
+	       (u32)data[2] << 16 |
+	       (u32)data[3] << 24;
+#endif
+}
+
+void efx_rx_slow_fill(struct timer_list *t);
+
+void efx_recycle_rx_pages(struct efx_channel *channel,
+			  struct efx_rx_buffer *rx_buf,
+			  unsigned int n_frags);
+void efx_discard_rx_packet(struct efx_channel *channel,
+			   struct efx_rx_buffer *rx_buf,
+			   unsigned int n_frags);
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_destroy_rx_queue(struct efx_rx_queue *rx_queue);
+
+void efx_init_rx_buffer(struct efx_rx_queue *rx_queue,
+			struct page *page,
+			unsigned int page_offset,
+			u16 flags);
+void efx_unmap_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf);
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+			 struct efx_rx_buffer *rx_buf,
+			 unsigned int num_bufs);
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
+void efx_rx_config_page_split(struct efx_nic *efx);
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
+
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+		  unsigned int n_frags, u8 *eh);
+
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx);
+
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+			   const struct efx_filter_spec *right);
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+			bool *force);
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+					const struct efx_filter_spec *spec);
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+				       const struct efx_filter_spec *spec,
+				       bool *new);
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
+#endif
+
+int efx_probe_filters(struct efx_nic *efx);
+void efx_remove_filters(struct efx_nic *efx);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index 8474cf8ea7d3..1ae369022d7d 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -18,6 +18,8 @@
 #include <linux/slab.h>
 #include "net_driver.h"
 #include "efx.h"
+#include "efx_common.h"
+#include "efx_channels.h"
 #include "nic.h"
 #include "selftest.h"
 #include "workarounds.h"
@@ -783,7 +785,7 @@ void efx_selftest_async_cancel(struct efx_nic *efx)
 	cancel_delayed_work_sync(&efx->selftest_work);
 }
 
-void efx_selftest_async_work(struct work_struct *data)
+static void efx_selftest_async_work(struct work_struct *data)
 {
 	struct efx_nic *efx = container_of(data, struct efx_nic,
 					   selftest_work.work);
@@ -802,3 +804,8 @@ void efx_selftest_async_work(struct work_struct *data)
 				  channel->channel, cpu);
 	}
 }
+
+void efx_selftest_async_init(struct efx_nic *efx)
+{
+	INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
+}
diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h
index a3553816d92c..ca88ebb4f6b1 100644
--- a/drivers/net/ethernet/sfc/selftest.h
+++ b/drivers/net/ethernet/sfc/selftest.h
@@ -45,8 +45,8 @@ void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr,
 			    int pkt_len);
 int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
 		 unsigned flags);
+void efx_selftest_async_init(struct efx_nic *efx);
 void efx_selftest_async_start(struct efx_nic *efx);
 void efx_selftest_async_cancel(struct efx_nic *efx);
-void efx_selftest_async_work(struct work_struct *data);
 
 #endif /* EFX_SELFTEST_H */
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 81499244a4b4..baa464161626 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -14,12 +14,14 @@
 #include "net_driver.h"
 #include "bitfield.h"
 #include "efx.h"
+#include "efx_common.h"
 #include "nic.h"
 #include "farch_regs.h"
 #include "io.h"
 #include "workarounds.h"
 #include "mcdi.h"
 #include "mcdi_pcol.h"
+#include "mcdi_port_common.h"
 #include "selftest.h"
 #include "siena_sriov.h"
 
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index dfbdf05dcf79..83dcfcae3d4b 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include "net_driver.h"
 #include "efx.h"
+#include "efx_channels.h"
 #include "nic.h"
 #include "io.h"
 #include "mcdi.h"
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 00c1c4402451..04d7f41d7ed9 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -20,6 +20,7 @@
 #include "io.h"
 #include "nic.h"
 #include "tx.h"
+#include "tx_common.h"
 #include "workarounds.h"
 #include "ef10_regs.h"
 
@@ -56,72 +57,6 @@ u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
 	return efx_tx_get_copy_buffer(tx_queue, buffer);
 }
 
-static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
-			       struct efx_tx_buffer *buffer,
-			       unsigned int *pkts_compl,
-			       unsigned int *bytes_compl)
-{
-	if (buffer->unmap_len) {
-		struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
-		dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
-		if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
-			dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
-					 DMA_TO_DEVICE);
-		else
-			dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
-				       DMA_TO_DEVICE);
-		buffer->unmap_len = 0;
-	}
-
-	if (buffer->flags & EFX_TX_BUF_SKB) {
-		struct sk_buff *skb = (struct sk_buff *)buffer->skb;
-
-		EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
-		(*pkts_compl)++;
-		(*bytes_compl) += skb->len;
-		if (tx_queue->timestamping &&
-		    (tx_queue->completed_timestamp_major ||
-		     tx_queue->completed_timestamp_minor)) {
-			struct skb_shared_hwtstamps hwtstamp;
-
-			hwtstamp.hwtstamp =
-				efx_ptp_nic_to_kernel_time(tx_queue);
-			skb_tstamp_tx(skb, &hwtstamp);
-
-			tx_queue->completed_timestamp_major = 0;
-			tx_queue->completed_timestamp_minor = 0;
-		}
-		dev_consume_skb_any((struct sk_buff *)buffer->skb);
-		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
-			   "TX queue %d transmission id %x complete\n",
-			   tx_queue->queue, tx_queue->read_count);
-	} else if (buffer->flags & EFX_TX_BUF_XDP) {
-		xdp_return_frame_rx_napi(buffer->xdpf);
-	}
-
-	buffer->len = 0;
-	buffer->flags = 0;
-}
-
-unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
-{
-	/* Header and payload descriptor for each output segment, plus
-	 * one for every input fragment boundary within a segment
-	 */
-	unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
-
-	/* Possibly one more per segment for option descriptors */
-	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
-		max_descs += EFX_TSO_MAX_SEGS;
-
-	/* Possibly more for PCIe page boundaries within input fragments */
-	if (PAGE_SIZE > EFX_PAGE_SIZE)
-		max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
-				   DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
-
-	return max_descs;
-}
-
 static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
 {
 	/* We need to consider both queues that the net core sees as one */
@@ -333,125 +268,6 @@ static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
 }
 #endif /* EFX_USE_PIO */
 
-static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
-					      dma_addr_t dma_addr,
-					      size_t len)
-{
-	const struct efx_nic_type *nic_type = tx_queue->efx->type;
-	struct efx_tx_buffer *buffer;
-	unsigned int dma_len;
-
-	/* Map the fragment taking account of NIC-dependent DMA limits. */
-	do {
-		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
-		dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
-
-		buffer->len = dma_len;
-		buffer->dma_addr = dma_addr;
-		buffer->flags = EFX_TX_BUF_CONT;
-		len -= dma_len;
-		dma_addr += dma_len;
-		++tx_queue->insert_count;
-	} while (len);
-
-	return buffer;
-}
-
-/* Map all data from an SKB for DMA and create descriptors on the queue.
- */
-static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
-			   unsigned int segment_count)
-{
-	struct efx_nic *efx = tx_queue->efx;
-	struct device *dma_dev = &efx->pci_dev->dev;
-	unsigned int frag_index, nr_frags;
-	dma_addr_t dma_addr, unmap_addr;
-	unsigned short dma_flags;
-	size_t len, unmap_len;
-
-	nr_frags = skb_shinfo(skb)->nr_frags;
-	frag_index = 0;
-
-	/* Map header data. */
-	len = skb_headlen(skb);
-	dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
-	dma_flags = EFX_TX_BUF_MAP_SINGLE;
-	unmap_len = len;
-	unmap_addr = dma_addr;
-
-	if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
-		return -EIO;
-
-	if (segment_count) {
-		/* For TSO we need to put the header in to a separate
-		 * descriptor. Map this separately if necessary.
-		 */
-		size_t header_len = skb_transport_header(skb) - skb->data +
-				(tcp_hdr(skb)->doff << 2u);
-
-		if (header_len != len) {
-			tx_queue->tso_long_headers++;
-			efx_tx_map_chunk(tx_queue, dma_addr, header_len);
-			len -= header_len;
-			dma_addr += header_len;
-		}
-	}
-
-	/* Add descriptors for each fragment. */
-	do {
-		struct efx_tx_buffer *buffer;
-		skb_frag_t *fragment;
-
-		buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
-
-		/* The final descriptor for a fragment is responsible for
-		 * unmapping the whole fragment.
-		 */
-		buffer->flags = EFX_TX_BUF_CONT | dma_flags;
-		buffer->unmap_len = unmap_len;
-		buffer->dma_offset = buffer->dma_addr - unmap_addr;
-
-		if (frag_index >= nr_frags) {
-			/* Store SKB details with the final buffer for
-			 * the completion.
-			 */
-			buffer->skb = skb;
-			buffer->flags = EFX_TX_BUF_SKB | dma_flags;
-			return 0;
-		}
-
-		/* Move on to the next fragment. */
-		fragment = &skb_shinfo(skb)->frags[frag_index++];
-		len = skb_frag_size(fragment);
-		dma_addr = skb_frag_dma_map(dma_dev, fragment,
-				0, len, DMA_TO_DEVICE);
-		dma_flags = 0;
-		unmap_len = len;
-		unmap_addr = dma_addr;
-
-		if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
-			return -EIO;
-	} while (1);
-}
-
-/* Remove buffers put into a tx_queue for the current packet.
- * None of the buffers must have an skb attached.
- */
-static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
-			       unsigned int insert_count)
-{
-	struct efx_tx_buffer *buffer;
-	unsigned int bytes_compl = 0;
-	unsigned int pkts_compl = 0;
-
-	/* Work backwards until we hit the original insert pointer value */
-	while (tx_queue->insert_count != insert_count) {
-		--tx_queue->insert_count;
-		buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
-		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-	}
-}
-
 /*
  * Fallback to software TSO.
  *
@@ -473,12 +289,9 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
 	dev_consume_skb_any(skb);
 	skb = segments;
 
-	while (skb) {
-		next = skb->next;
-		skb->next = NULL;
-
+	skb_list_walk_safe(skb, skb, next) {
+		skb_mark_not_on_list(skb);
 		efx_enqueue_skb(tx_queue, skb);
-		skb = next;
 	}
 
 	return 0;
@@ -687,41 +500,6 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
 	return i;
 }
 
-/* Remove packets from the TX queue
- *
- * This removes packets from the TX queue, up to and including the
- * specified index.
- */
-static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
-				unsigned int index,
-				unsigned int *pkts_compl,
-				unsigned int *bytes_compl)
-{
-	struct efx_nic *efx = tx_queue->efx;
-	unsigned int stop_index, read_ptr;
-
-	stop_index = (index + 1) & tx_queue->ptr_mask;
-	read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
-
-	while (read_ptr != stop_index) {
-		struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
-
-		if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
-		    unlikely(buffer->len == 0)) {
-			netif_err(efx, tx_err, efx->net_dev,
-				  "TX queue %d spurious TX completion id %x\n",
-				  tx_queue->queue, read_ptr);
-			efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
-			return;
-		}
-
-		efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
-
-		++tx_queue->read_count;
-		read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
-	}
-}
-
 /* Initiate a packet transmission.  We use one channel per CPU
  * (sharing when we have more CPUs than channels).  On Falcon, the TX
  * completion events will be directed back to the CPU that transmitted
@@ -834,173 +612,3 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
 	net_dev->num_tc = num_tc;
 	return 0;
 }
-
-void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
-{
-	unsigned fill_level;
-	struct efx_nic *efx = tx_queue->efx;
-	struct efx_tx_queue *txq2;
-	unsigned int pkts_compl = 0, bytes_compl = 0;
-
-	EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
-
-	efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
-	tx_queue->pkts_compl += pkts_compl;
-	tx_queue->bytes_compl += bytes_compl;
-
-	if (pkts_compl > 1)
-		++tx_queue->merge_events;
-
-	/* See if we need to restart the netif queue.  This memory
-	 * barrier ensures that we write read_count (inside
-	 * efx_dequeue_buffers()) before reading the queue status.
-	 */
-	smp_mb();
-	if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
-	    likely(efx->port_enabled) &&
-	    likely(netif_device_present(efx->net_dev))) {
-		txq2 = efx_tx_queue_partner(tx_queue);
-		fill_level = max(tx_queue->insert_count - tx_queue->read_count,
-				 txq2->insert_count - txq2->read_count);
-		if (fill_level <= efx->txq_wake_thresh)
-			netif_tx_wake_queue(tx_queue->core_txq);
-	}
-
-	/* Check whether the hardware queue is now empty */
-	if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
-		tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
-		if (tx_queue->read_count == tx_queue->old_write_count) {
-			smp_mb();
-			tx_queue->empty_read_count =
-				tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
-		}
-	}
-}
-
-static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
-{
-	return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER);
-}
-
-int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
-{
-	struct efx_nic *efx = tx_queue->efx;
-	unsigned int entries;
-	int rc;
-
-	/* Create the smallest power-of-two aligned ring */
-	entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
-	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
-	tx_queue->ptr_mask = entries - 1;
-
-	netif_dbg(efx, probe, efx->net_dev,
-		  "creating TX queue %d size %#x mask %#x\n",
-		  tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
-
-	/* Allocate software ring */
-	tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
-				   GFP_KERNEL);
-	if (!tx_queue->buffer)
-		return -ENOMEM;
-
-	tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
-				    sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
-	if (!tx_queue->cb_page) {
-		rc = -ENOMEM;
-		goto fail1;
-	}
-
-	/* Allocate hardware ring */
-	rc = efx_nic_probe_tx(tx_queue);
-	if (rc)
-		goto fail2;
-
-	return 0;
-
-fail2:
-	kfree(tx_queue->cb_page);
-	tx_queue->cb_page = NULL;
-fail1:
-	kfree(tx_queue->buffer);
-	tx_queue->buffer = NULL;
-	return rc;
-}
-
-void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
-{
-	struct efx_nic *efx = tx_queue->efx;
-
-	netif_dbg(efx, drv, efx->net_dev,
-		  "initialising TX queue %d\n", tx_queue->queue);
-
-	tx_queue->insert_count = 0;
-	tx_queue->write_count = 0;
-	tx_queue->packet_write_count = 0;
-	tx_queue->old_write_count = 0;
-	tx_queue->read_count = 0;
-	tx_queue->old_read_count = 0;
-	tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
-	tx_queue->xmit_more_available = false;
-	tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
-				  tx_queue->channel == efx_ptp_channel(efx));
-	tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
-	tx_queue->completed_timestamp_major = 0;
-	tx_queue->completed_timestamp_minor = 0;
-
-	tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
-
-	/* Set up default function pointers. These may get replaced by
-	 * efx_nic_init_tx() based off NIC/queue capabilities.
-	 */
-	tx_queue->handle_tso = efx_enqueue_skb_tso;
-
-	/* Set up TX descriptor ring */
-	efx_nic_init_tx(tx_queue);
-
-	tx_queue->initialised = true;
-}
-
-void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
-{
-	struct efx_tx_buffer *buffer;
-
-	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
-		  "shutting down TX queue %d\n", tx_queue->queue);
-
-	if (!tx_queue->buffer)
-		return;
-
-	/* Free any buffers left in the ring */
-	while (tx_queue->read_count != tx_queue->write_count) {
-		unsigned int pkts_compl = 0, bytes_compl = 0;
-		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
-		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-
-		++tx_queue->read_count;
-	}
-	tx_queue->xmit_more_available = false;
-	netdev_tx_reset_queue(tx_queue->core_txq);
-}
-
-void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
-{
-	int i;
-
-	if (!tx_queue->buffer)
-		return;
-
-	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
-		  "destroying TX queue %d\n", tx_queue->queue);
-	efx_nic_remove_tx(tx_queue);
-
-	if (tx_queue->cb_page) {
-		for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
-			efx_nic_free_buffer(tx_queue->efx,
-					    &tx_queue->cb_page[i]);
-		kfree(tx_queue->cb_page);
-		tx_queue->cb_page = NULL;
-	}
-
-	kfree(tx_queue->buffer);
-	tx_queue->buffer = NULL;
-}
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
new file mode 100644
index 000000000000..b1571e9789d0
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "tx_common.h"
+
+static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
+{
+	return DIV_ROUND_UP(tx_queue->ptr_mask + 1,
+			    PAGE_SIZE >> EFX_TX_CB_ORDER);
+}
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
+{
+	struct efx_nic *efx = tx_queue->efx;
+	unsigned int entries;
+	int rc;
+
+	/* Create the smallest power-of-two aligned ring */
+	entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
+	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+	tx_queue->ptr_mask = entries - 1;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "creating TX queue %d size %#x mask %#x\n",
+		  tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
+
+	/* Allocate software ring */
+	tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
+				   GFP_KERNEL);
+	if (!tx_queue->buffer)
+		return -ENOMEM;
+
+	tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
+				    sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+	if (!tx_queue->cb_page) {
+		rc = -ENOMEM;
+		goto fail1;
+	}
+
+	/* Allocate hardware ring */
+	rc = efx_nic_probe_tx(tx_queue);
+	if (rc)
+		goto fail2;
+
+	return 0;
+
+fail2:
+	kfree(tx_queue->cb_page);
+	tx_queue->cb_page = NULL;
+fail1:
+	kfree(tx_queue->buffer);
+	tx_queue->buffer = NULL;
+	return rc;
+}
+
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
+{
+	struct efx_nic *efx = tx_queue->efx;
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "initialising TX queue %d\n", tx_queue->queue);
+
+	tx_queue->insert_count = 0;
+	tx_queue->write_count = 0;
+	tx_queue->packet_write_count = 0;
+	tx_queue->old_write_count = 0;
+	tx_queue->read_count = 0;
+	tx_queue->old_read_count = 0;
+	tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
+	tx_queue->xmit_more_available = false;
+	tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
+				  tx_queue->channel == efx_ptp_channel(efx));
+	tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+	tx_queue->completed_timestamp_major = 0;
+	tx_queue->completed_timestamp_minor = 0;
+
+	tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
+
+	/* Set up default function pointers. These may get replaced by
+	 * efx_nic_init_tx() based off NIC/queue capabilities.
+	 */
+	tx_queue->handle_tso = efx_enqueue_skb_tso;
+
+	/* Set up TX descriptor ring */
+	efx_nic_init_tx(tx_queue);
+
+	tx_queue->initialised = true;
+}
+
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
+{
+	struct efx_tx_buffer *buffer;
+
+	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+		  "shutting down TX queue %d\n", tx_queue->queue);
+
+	if (!tx_queue->buffer)
+		return;
+
+	/* Free any buffers left in the ring */
+	while (tx_queue->read_count != tx_queue->write_count) {
+		unsigned int pkts_compl = 0, bytes_compl = 0;
+
+		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
+		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+		++tx_queue->read_count;
+	}
+	tx_queue->xmit_more_available = false;
+	netdev_tx_reset_queue(tx_queue->core_txq);
+}
+
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
+{
+	int i;
+
+	if (!tx_queue->buffer)
+		return;
+
+	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+		  "destroying TX queue %d\n", tx_queue->queue);
+	efx_nic_remove_tx(tx_queue);
+
+	if (tx_queue->cb_page) {
+		for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
+			efx_nic_free_buffer(tx_queue->efx,
+					    &tx_queue->cb_page[i]);
+		kfree(tx_queue->cb_page);
+		tx_queue->cb_page = NULL;
+	}
+
+	kfree(tx_queue->buffer);
+	tx_queue->buffer = NULL;
+}
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+			struct efx_tx_buffer *buffer,
+			unsigned int *pkts_compl,
+			unsigned int *bytes_compl)
+{
+	if (buffer->unmap_len) {
+		struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
+		dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
+
+		if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
+			dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
+					 DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
+				       DMA_TO_DEVICE);
+		buffer->unmap_len = 0;
+	}
+
+	if (buffer->flags & EFX_TX_BUF_SKB) {
+		struct sk_buff *skb = (struct sk_buff *)buffer->skb;
+
+		EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
+		(*pkts_compl)++;
+		(*bytes_compl) += skb->len;
+		if (tx_queue->timestamping &&
+		    (tx_queue->completed_timestamp_major ||
+		     tx_queue->completed_timestamp_minor)) {
+			struct skb_shared_hwtstamps hwtstamp;
+
+			hwtstamp.hwtstamp =
+				efx_ptp_nic_to_kernel_time(tx_queue);
+			skb_tstamp_tx(skb, &hwtstamp);
+
+			tx_queue->completed_timestamp_major = 0;
+			tx_queue->completed_timestamp_minor = 0;
+		}
+		dev_consume_skb_any((struct sk_buff *)buffer->skb);
+		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
+			   "TX queue %d transmission id %x complete\n",
+			   tx_queue->queue, tx_queue->read_count);
+	} else if (buffer->flags & EFX_TX_BUF_XDP) {
+		xdp_return_frame_rx_napi(buffer->xdpf);
+	}
+
+	buffer->len = 0;
+	buffer->flags = 0;
+}
+
+/* Remove packets from the TX queue
+ *
+ * This removes packets from the TX queue, up to and including the
+ * specified index.
+ */
+static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
+				unsigned int index,
+				unsigned int *pkts_compl,
+				unsigned int *bytes_compl)
+{
+	struct efx_nic *efx = tx_queue->efx;
+	unsigned int stop_index, read_ptr;
+
+	stop_index = (index + 1) & tx_queue->ptr_mask;
+	read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+	while (read_ptr != stop_index) {
+		struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+		if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
+		    unlikely(buffer->len == 0)) {
+			netif_err(efx, tx_err, efx->net_dev,
+				  "TX queue %d spurious TX completion id %x\n",
+				  tx_queue->queue, read_ptr);
+			efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+			return;
+		}
+
+		efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+
+		++tx_queue->read_count;
+		read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+	}
+}
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
+{
+	unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
+	struct efx_nic *efx = tx_queue->efx;
+	struct efx_tx_queue *txq2;
+
+	EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
+
+	efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+	tx_queue->pkts_compl += pkts_compl;
+	tx_queue->bytes_compl += bytes_compl;
+
+	if (pkts_compl > 1)
+		++tx_queue->merge_events;
+
+	/* See if we need to restart the netif queue.  This memory
+	 * barrier ensures that we write read_count (inside
+	 * efx_dequeue_buffers()) before reading the queue status.
+	 */
+	smp_mb();
+	if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
+	    likely(efx->port_enabled) &&
+	    likely(netif_device_present(efx->net_dev))) {
+		txq2 = efx_tx_queue_partner(tx_queue);
+		fill_level = max(tx_queue->insert_count - tx_queue->read_count,
+				 txq2->insert_count - txq2->read_count);
+		if (fill_level <= efx->txq_wake_thresh)
+			netif_tx_wake_queue(tx_queue->core_txq);
+	}
+
+	/* Check whether the hardware queue is now empty */
+	if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+		tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
+		if (tx_queue->read_count == tx_queue->old_write_count) {
+			smp_mb();
+			tx_queue->empty_read_count =
+				tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
+		}
+	}
+}
+
+/* Remove buffers put into a tx_queue for the current packet.
+ * None of the buffers must have an skb attached.
+ */
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+			unsigned int insert_count)
+{
+	struct efx_tx_buffer *buffer;
+	unsigned int bytes_compl = 0;
+	unsigned int pkts_compl = 0;
+
+	/* Work backwards until we hit the original insert pointer value */
+	while (tx_queue->insert_count != insert_count) {
+		--tx_queue->insert_count;
+		buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+	}
+}
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+				       dma_addr_t dma_addr, size_t len)
+{
+	const struct efx_nic_type *nic_type = tx_queue->efx->type;
+	struct efx_tx_buffer *buffer;
+	unsigned int dma_len;
+
+	/* Map the fragment taking account of NIC-dependent DMA limits. */
+	do {
+		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+		dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+
+		buffer->len = dma_len;
+		buffer->dma_addr = dma_addr;
+		buffer->flags = EFX_TX_BUF_CONT;
+		len -= dma_len;
+		dma_addr += dma_len;
+		++tx_queue->insert_count;
+	} while (len);
+
+	return buffer;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue. */
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+		    unsigned int segment_count)
+{
+	struct efx_nic *efx = tx_queue->efx;
+	struct device *dma_dev = &efx->pci_dev->dev;
+	unsigned int frag_index, nr_frags;
+	dma_addr_t dma_addr, unmap_addr;
+	unsigned short dma_flags;
+	size_t len, unmap_len;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	frag_index = 0;
+
+	/* Map header data. */
+	len = skb_headlen(skb);
+	dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+	dma_flags = EFX_TX_BUF_MAP_SINGLE;
+	unmap_len = len;
+	unmap_addr = dma_addr;
+
+	if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+		return -EIO;
+
+	if (segment_count) {
+		/* For TSO we need to put the header in to a separate
+		 * descriptor. Map this separately if necessary.
+		 */
+		size_t header_len = skb_transport_header(skb) - skb->data +
+				(tcp_hdr(skb)->doff << 2u);
+
+		if (header_len != len) {
+			tx_queue->tso_long_headers++;
+			efx_tx_map_chunk(tx_queue, dma_addr, header_len);
+			len -= header_len;
+			dma_addr += header_len;
+		}
+	}
+
+	/* Add descriptors for each fragment. */
+	do {
+		struct efx_tx_buffer *buffer;
+		skb_frag_t *fragment;
+
+		buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+
+		/* The final descriptor for a fragment is responsible for
+		 * unmapping the whole fragment.
+		 */
+		buffer->flags = EFX_TX_BUF_CONT | dma_flags;
+		buffer->unmap_len = unmap_len;
+		buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+		if (frag_index >= nr_frags) {
+			/* Store SKB details with the final buffer for
+			 * the completion.
+			 */
+			buffer->skb = skb;
+			buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+			return 0;
+		}
+
+		/* Move on to the next fragment. */
+		fragment = &skb_shinfo(skb)->frags[frag_index++];
+		len = skb_frag_size(fragment);
+		dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
+					    DMA_TO_DEVICE);
+		dma_flags = 0;
+		unmap_len = len;
+		unmap_addr = dma_addr;
+
+		if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+			return -EIO;
+	} while (1);
+}
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
+{
+	/* Header and payload descriptor for each output segment, plus
+	 * one for every input fragment boundary within a segment
+	 */
+	unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
+
+	/* Possibly one more per segment for option descriptors */
+	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+		max_descs += EFX_TSO_MAX_SEGS;
+
+	/* Possibly more for PCIe page boundaries within input fragments */
+	if (PAGE_SIZE > EFX_PAGE_SIZE)
+		max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
+				   DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
+
+	return max_descs;
+}
diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h
new file mode 100644
index 000000000000..f92f1fe3a87f
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx_common.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TX_COMMON_H
+#define EFX_TX_COMMON_H
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+			struct efx_tx_buffer *buffer,
+			unsigned int *pkts_compl,
+			unsigned int *bytes_compl);
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+			unsigned int insert_count);
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+				       dma_addr_t dma_addr, size_t len);
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+		    unsigned int segment_count);
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
+
+#endif
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index d242906ae233..06637b03deed 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -114,7 +114,7 @@ struct ioc3_private {
 static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static void ioc3_set_multicast_list(struct net_device *dev);
 static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static void ioc3_timeout(struct net_device *dev);
+static void ioc3_timeout(struct net_device *dev, unsigned int txqueue);
 static inline unsigned int ioc3_hash(const unsigned char *addr);
 static void ioc3_start(struct ioc3_private *ip);
 static inline void ioc3_stop(struct ioc3_private *ip);
@@ -1479,7 +1479,7 @@ drop_packet:
 	return NETDEV_TX_OK;
 }
 
-static void ioc3_timeout(struct net_device *dev)
+static void ioc3_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ioc3_private *ip = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 539bc5db989c..0c396ecd3389 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -90,7 +90,7 @@ struct meth_private {
 	spinlock_t meth_lock;
 };
 
-static void meth_tx_timeout(struct net_device *dev);
+static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static irqreturn_t meth_interrupt(int irq, void *dev_id);
 
 /* global, initialized in ip32-setup.c */
@@ -727,7 +727,7 @@ static netdev_tx_t meth_tx(struct sk_buff *skb, struct net_device *dev)
 /*
  * Deal with a transmit timeout.
  */
-static void meth_tx_timeout(struct net_device *dev)
+static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct meth_private *priv = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index c7641a236eb8..cb043eb1bdc1 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -1078,7 +1078,7 @@ static void sc92031_set_multicast_list(struct net_device *dev)
 	spin_unlock_bh(&priv->lock);
 }
 
-static void sc92031_tx_timeout(struct net_device *dev)
+static void sc92031_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct sc92031_priv *priv = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index 5b351beb78cb..5a4b6e3ab38f 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1538,7 +1538,7 @@ err_out_0:
 	goto out;
 }
 
-static void sis190_tx_timeout(struct net_device *dev)
+static void sis190_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct sis190_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 85eaccbbbac1..81ed7589e33c 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -222,7 +222,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location);
 static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val);
 static void sis900_timer(struct timer_list *t);
 static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy);
-static void sis900_tx_timeout(struct net_device *net_dev);
+static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue);
 static void sis900_init_tx_ring(struct net_device *net_dev);
 static void sis900_init_rx_ring(struct net_device *net_dev);
 static netdev_tx_t sis900_start_xmit(struct sk_buff *skb,
@@ -1537,7 +1537,7 @@ static void sis900_read_mode(struct net_device *net_dev, int *speed, int *duplex
  *	disable interrupts and do some tasks
  */
 
-static void sis900_tx_timeout(struct net_device *net_dev)
+static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
 {
 	struct sis900_private *sis_priv = netdev_priv(net_dev);
 	void __iomem *ioaddr = sis_priv->ioaddr;
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index be47d864f8b9..61ddee0c2a2e 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -280,6 +280,7 @@ struct epic_private {
 	signed char phys[4];				/* MII device addresses. */
 	u16 advertising;					/* NWay media advertisement */
 	int mii_phy_cnt;
+	u32 ethtool_ops_nesting;
 	struct mii_if_info mii;
 	unsigned int tx_full:1;				/* The Tx queue is full. */
 	unsigned int default_port:4;		/* Last dev->if_port value. */
@@ -291,7 +292,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *dev, int phy_id, int loc, int val);
 static void epic_restart(struct net_device *dev);
 static void epic_timer(struct timer_list *t);
-static void epic_tx_timeout(struct net_device *dev);
+static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void epic_init_ring(struct net_device *dev);
 static netdev_tx_t epic_start_xmit(struct sk_buff *skb,
 				   struct net_device *dev);
@@ -861,7 +862,7 @@ static void epic_timer(struct timer_list *t)
 	add_timer(&ep->timer);
 }
 
-static void epic_tx_timeout(struct net_device *dev)
+static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct epic_private *ep = netdev_priv(dev);
 	void __iomem *ioaddr = ep->ioaddr;
@@ -1435,8 +1436,10 @@ static int ethtool_begin(struct net_device *dev)
 	struct epic_private *ep = netdev_priv(dev);
 	void __iomem *ioaddr = ep->ioaddr;
 
+	if (ep->ethtool_ops_nesting == U32_MAX)
+		return -EBUSY;
 	/* power-up, if interface is down */
-	if (!netif_running(dev)) {
+	if (!ep->ethtool_ops_nesting++ && !netif_running(dev)) {
 		ew32(GENCTL, 0x0200);
 		ew32(NVCTL, (er32(NVCTL) & ~0x003c) | 0x4800);
 	}
@@ -1449,7 +1452,7 @@ static void ethtool_complete(struct net_device *dev)
 	void __iomem *ioaddr = ep->ioaddr;
 
 	/* power-down, if interface is down */
-	if (!netif_running(dev)) {
+	if (!--ep->ethtool_ops_nesting && !netif_running(dev)) {
 		ew32(GENCTL, 0x0008);
 		ew32(NVCTL, (er32(NVCTL) & ~0x483c) | 0x0000);
 	}
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 8d88e4083456..186c0bddbe5f 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work)
 	if (lp->ctl_rspeed != 100)
 		my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF);
 
-	 if (!lp->ctl_rfduplx)
+	if (!lp->ctl_rfduplx)
 		my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL);
 
 	/* Update our Auto-Neg Advertisement Register */
@@ -1245,7 +1245,7 @@ static void smc911x_poll_controller(struct net_device *dev)
 #endif
 
 /* Our watchdog timed out. Called by the networking layer */
-static void smc911x_timeout(struct net_device *dev)
+static void smc911x_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int status, mask;
diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c
index d3bb2ba51f40..4b2330deed47 100644
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -216,7 +216,7 @@ static int smc_open(struct net_device *dev);
 /*
  . Our watchdog timed out. Called by the networking layer
 */
-static void smc_timeout(struct net_device *dev);
+static void smc_timeout(struct net_device *dev, unsigned int txqueue);
 
 /*
  . This is called by the kernel in response to 'ifconfig ethX down'.  It
@@ -1094,7 +1094,7 @@ static int smc_open(struct net_device *dev)
  .--------------------------------------------------------
 */
 
-static void smc_timeout(struct net_device *dev)
+static void smc_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	/* If we get here, some higher level has decided we are broken.
 	   There should really be a "kick me" function call instead. */
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index a55f430f6a7b..f2a50eb3c1e0 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -271,7 +271,7 @@ static void smc91c92_release(struct pcmcia_device *link);
 static int smc_open(struct net_device *dev);
 static int smc_close(struct net_device *dev);
 static int smc_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-static void smc_tx_timeout(struct net_device *dev);
+static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static netdev_tx_t smc_start_xmit(struct sk_buff *skb,
 					struct net_device *dev);
 static irqreturn_t smc_interrupt(int irq, void *dev_id);
@@ -1178,7 +1178,7 @@ static void smc_hardware_send_packet(struct net_device * dev)
 
 /*====================================================================*/
 
-static void smc_tx_timeout(struct net_device *dev)
+static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
     struct smc_private *smc = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 3a6761131f4c..90410f9d3b1a 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1321,7 +1321,7 @@ static void smc_poll_controller(struct net_device *dev)
 #endif
 
 /* Our watchdog timed out. Called by the networking layer */
-static void smc_timeout(struct net_device *dev)
+static void smc_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct smc_local *lp = netdev_priv(dev);
 	void __iomem *ioaddr = lp->base;
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 38068fc34141..49a6a9167af4 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -1943,15 +1943,6 @@ static int smsc911x_set_mac_address(struct net_device *dev, void *p)
 	return 0;
 }
 
-/* Standard ioctls for mii-tool */
-static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	if (!netif_running(dev) || !dev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(dev->phydev, ifr, cmd);
-}
-
 static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
 					struct ethtool_drvinfo *info)
 {
@@ -2151,7 +2142,7 @@ static const struct net_device_ops smsc911x_netdev_ops = {
 	.ndo_start_xmit		= smsc911x_hard_start_xmit,
 	.ndo_get_stats		= smsc911x_get_stats,
 	.ndo_set_rx_mode	= smsc911x_set_multicast_list,
-	.ndo_do_ioctl		= smsc911x_do_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= smsc911x_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2454,7 +2445,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
 
 	pdata = netdev_priv(dev);
 	dev->irq = irq;
-	pdata->ioaddr = ioremap_nocache(res->start, res_size);
+	pdata->ioaddr = ioremap(res->start, res_size);
 	if (!pdata->ioaddr) {
 		retval = -ENOMEM;
 		goto out_ioremap_fail;
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index a6962a41c3d2..7312e522c022 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -210,15 +210,6 @@ static int smsc9420_eeprom_reload(struct smsc9420_pdata *pd)
 	return -EIO;
 }
 
-/* Standard ioctls for mii-tool */
-static int smsc9420_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	if (!netif_running(dev) || !dev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(dev->phydev, ifr, cmd);
-}
-
 static void smsc9420_ethtool_get_drvinfo(struct net_device *netdev,
 					 struct ethtool_drvinfo *drvinfo)
 {
@@ -1504,7 +1495,7 @@ static const struct net_device_ops smsc9420_netdev_ops = {
 	.ndo_start_xmit		= smsc9420_hard_start_xmit,
 	.ndo_get_stats		= smsc9420_get_stats,
 	.ndo_set_rx_mode	= smsc9420_set_multicast_list,
-	.ndo_do_ioctl		= smsc9420_do_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 869a498e3b5e..e8224b543dfc 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -243,6 +243,7 @@
 			       NET_IP_ALIGN)
 #define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define NETSEC_RX_BUF_SIZE	(PAGE_SIZE - NETSEC_RX_BUF_NON_DATA)
 
 #define DESC_SZ	sizeof(struct netsec_de)
 
@@ -719,7 +720,6 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
 {
 
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
-	enum dma_data_direction dma_dir;
 	struct page *page;
 
 	page = page_pool_dev_alloc_pages(dring->page_pool);
@@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
 	/* Make sure the incoming payload fits in the page for XDP and non-XDP
 	 * cases and reserve enough space for headroom + skb_shared_info
 	 */
-	*desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
-	dma_dir = page_pool_get_dma_dir(dring->page_pool);
-	dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
+	*desc_len = NETSEC_RX_BUF_SIZE;
 
 	return page_address(page);
 }
@@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
 static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 			  struct xdp_buff *xdp)
 {
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
+	unsigned int len = xdp->data_end - xdp->data;
 	u32 ret = NETSEC_XDP_PASS;
 	int err;
 	u32 act;
@@ -896,7 +896,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 	case XDP_TX:
 		ret = netsec_xdp_xmit_back(priv, xdp);
 		if (ret != NETSEC_XDP_TX)
-			xdp_return_buff(xdp);
+			__page_pool_put_page(dring->page_pool,
+					     virt_to_head_page(xdp->data),
+					     len, true);
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(priv->ndev, xdp, prog);
@@ -904,7 +906,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 			ret = NETSEC_XDP_REDIR;
 		} else {
 			ret = NETSEC_XDP_CONSUMED;
-			xdp_return_buff(xdp);
+			__page_pool_put_page(dring->page_pool,
+					     virt_to_head_page(xdp->data),
+					     len, true);
 		}
 		break;
 	default:
@@ -915,7 +919,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 		/* fall through -- handle aborts by dropping packet */
 	case XDP_DROP:
 		ret = NETSEC_XDP_CONSUMED;
-		xdp_return_buff(xdp);
+		__page_pool_put_page(dring->page_pool,
+				     virt_to_head_page(xdp->data),
+				     len, true);
 		break;
 	}
 
@@ -929,7 +935,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 	struct netsec_rx_pkt_info rx_info;
 	enum dma_data_direction dma_dir;
 	struct bpf_prog *xdp_prog;
-	struct sk_buff *skb = NULL;
 	u16 xdp_xmit = 0;
 	u32 xdp_act = 0;
 	int done = 0;
@@ -943,7 +948,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 		struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
 		struct netsec_desc *desc = &dring->desc[idx];
 		struct page *page = virt_to_page(desc->addr);
-		u32 xdp_result = XDP_PASS;
+		u32 xdp_result = NETSEC_XDP_PASS;
+		struct sk_buff *skb = NULL;
 		u16 pkt_len, desc_len;
 		dma_addr_t dma_handle;
 		struct xdp_buff xdp;
@@ -1014,7 +1020,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 			 * cache state. Since we paid the allocation cost if
 			 * building an skb fails try to put the page into cache
 			 */
-			page_pool_recycle_direct(dring->page_pool, page);
+			__page_pool_put_page(dring->page_pool, page,
+					     pkt_len, true);
 			netif_err(priv, drv, priv->ndev,
 				  "rx failed to build skb\n");
 			break;
@@ -1272,17 +1279,19 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
 	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
-	struct page_pool_params pp_params = { 0 };
+	struct page_pool_params pp_params = {
+		.order = 0,
+		/* internal DMA mapping in page_pool */
+		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+		.pool_size = DESC_NUM,
+		.nid = NUMA_NO_NODE,
+		.dev = priv->dev,
+		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+		.offset = NETSEC_RXBUF_HEADROOM,
+		.max_len = NETSEC_RX_BUF_SIZE,
+	};
 	int i, err;
 
-	pp_params.order = 0;
-	/* internal DMA mapping in page_pool */
-	pp_params.flags = PP_FLAG_DMA_MAP;
-	pp_params.pool_size = DESC_NUM;
-	pp_params.nid = cpu_to_node(0);
-	pp_params.dev = priv->dev;
-	pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-
 	dring->page_pool = page_pool_create(&pp_params);
 	if (IS_ERR(dring->page_pool)) {
 		err = PTR_ERR(dring->page_pool);
@@ -1731,12 +1740,6 @@ static int netsec_netdev_set_features(struct net_device *ndev,
 	return 0;
 }
 
-static int netsec_netdev_ioctl(struct net_device *ndev, struct ifreq *ifr,
-			       int cmd)
-{
-	return phy_mii_ioctl(ndev->phydev, ifr, cmd);
-}
-
 static int netsec_xdp_xmit(struct net_device *ndev, int n,
 			   struct xdp_frame **frames, u32 flags)
 {
@@ -1821,7 +1824,7 @@ static const struct net_device_ops netsec_netdev_ops = {
 	.ndo_set_features	= netsec_netdev_set_features,
 	.ndo_set_mac_address    = eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_do_ioctl		= netsec_netdev_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl,
 	.ndo_xdp_xmit		= netsec_xdp_xmit,
 	.ndo_bpf		= netsec_xdp,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 94f94686cf7d..487099092693 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -363,6 +363,12 @@ struct dma_features {
 	unsigned int dvlan;
 	unsigned int l3l4fnum;
 	unsigned int arpoffsel;
+	/* TSN Features */
+	unsigned int estwid;
+	unsigned int estdep;
+	unsigned int estsel;
+	unsigned int fpesel;
+	unsigned int tbssel;
 };
 
 /* RX Buffer size must be multiple of 4/8/16 bytes */
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h
index 9f0b9a9e63b3..49d6a866244f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs.h
@@ -171,6 +171,15 @@ struct dma_extended_desc {
 	__le32 des7;	/* Tx/Rx Timestamp High */
 };
 
+/* Enhanced descriptor for TBS */
+struct dma_edesc {
+	__le32 des4;
+	__le32 des5;
+	__le32 des6;
+	__le32 des7;
+	struct dma_desc basic;
+};
+
 /* Transmit checksum insertion control */
 #define	TX_CIC_FULL	3	/* Include IP header and pseudoheader */
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index dd9967aeda22..2342d497348e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -40,7 +40,7 @@ struct tegra_eqos {
 static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
 				   struct plat_stmmacenet_data *plat_dat)
 {
-	struct device_node *np = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
 	u32 burst_map = 0;
 	u32 bit_index = 0;
 	u32 a_index = 0;
@@ -52,9 +52,10 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
 			return -ENOMEM;
 	}
 
-	plat_dat->axi->axi_lpi_en = of_property_read_bool(np, "snps,en-lpi");
-	if (of_property_read_u32(np, "snps,write-requests",
-				 &plat_dat->axi->axi_wr_osr_lmt)) {
+	plat_dat->axi->axi_lpi_en = device_property_read_bool(dev,
+							      "snps,en-lpi");
+	if (device_property_read_u32(dev, "snps,write-requests",
+				     &plat_dat->axi->axi_wr_osr_lmt)) {
 		/**
 		 * Since the register has a reset value of 1, if property
 		 * is missing, default to 1.
@@ -68,8 +69,8 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
 		plat_dat->axi->axi_wr_osr_lmt--;
 	}
 
-	if (of_property_read_u32(np, "snps,read-requests",
-				 &plat_dat->axi->axi_rd_osr_lmt)) {
+	if (device_property_read_u32(dev, "snps,read-requests",
+				     &plat_dat->axi->axi_rd_osr_lmt)) {
 		/**
 		 * Since the register has a reset value of 1, if property
 		 * is missing, default to 1.
@@ -82,7 +83,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
 		 */
 		plat_dat->axi->axi_rd_osr_lmt--;
 	}
-	of_property_read_u32(np, "snps,burst-map", &burst_map);
+	device_property_read_u32(dev, "snps,burst-map", &burst_map);
 
 	/* converts burst-map bitmask to burst array */
 	for (bit_index = 0; bit_index < 7; bit_index++) {
@@ -270,6 +271,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
 			      struct plat_stmmacenet_data *data,
 			      struct stmmac_resources *res)
 {
+	struct device *dev = &pdev->dev;
 	struct tegra_eqos *eqos;
 	int err;
 
@@ -282,6 +284,9 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
 	eqos->dev = &pdev->dev;
 	eqos->regs = res->addr;
 
+	if (!is_of_node(dev->fwnode))
+		goto bypass_clk_reset_gpio;
+
 	eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus");
 	if (IS_ERR(eqos->clk_master)) {
 		err = PTR_ERR(eqos->clk_master);
@@ -354,6 +359,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
 
 	usleep_range(2000, 4000);
 
+bypass_clk_reset_gpio:
 	data->fix_mac_speed = tegra_eqos_fix_speed;
 	data->init = tegra_eqos_init;
 	data->bsp_priv = eqos;
@@ -421,7 +427,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
 	void *priv;
 	int ret;
 
-	data = of_device_get_match_data(&pdev->dev);
+	data = device_get_match_data(&pdev->dev);
 
 	memset(&stmmac_res, 0, sizeof(struct stmmac_resources));
 
@@ -478,7 +484,7 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev)
 	const struct dwc_eth_dwmac_data *data;
 	int err;
 
-	data = of_device_get_match_data(&pdev->dev);
+	data = device_get_match_data(&pdev->dev);
 
 	err = stmmac_dvr_remove(&pdev->dev);
 	if (err < 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index bdb80421acac..9e4b83832938 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -55,6 +55,8 @@ struct mediatek_dwmac_plat_data {
 	struct regmap *peri_regmap;
 	struct device *dev;
 	phy_interface_t phy_mode;
+	int num_clks_to_config;
+	bool rmii_clk_from_mac;
 	bool rmii_rxc;
 };
 
@@ -73,21 +75,33 @@ struct mediatek_dwmac_variant {
 
 /* list of clocks required for mac */
 static const char * const mt2712_dwmac_clk_l[] = {
-	"axi", "apb", "mac_main", "ptp_ref"
+	"axi", "apb", "mac_main", "ptp_ref", "rmii_internal"
 };
 
 static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat)
 {
+	int rmii_clk_from_mac = plat->rmii_clk_from_mac ? RMII_CLK_SRC_INTERNAL : 0;
 	int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0;
 	u32 intf_val = 0;
 
+	/* The clock labeled as "rmii_internal" in mt2712_dwmac_clk_l is needed
+	 * only in RMII(when MAC provides the reference clock), and useless for
+	 * RGMII/MII/RMII(when PHY provides the reference clock).
+	 * num_clks_to_config indicates the real number of clocks should be
+	 * configured, equals to (plat->variant->num_clks - 1) in default for all the case,
+	 * then +1 for rmii_clk_from_mac case.
+	 */
+	plat->num_clks_to_config = plat->variant->num_clks - 1;
+
 	/* select phy interface in top control domain */
 	switch (plat->phy_mode) {
 	case PHY_INTERFACE_MODE_MII:
 		intf_val |= PHY_INTF_MII;
 		break;
 	case PHY_INTERFACE_MODE_RMII:
-		intf_val |= (PHY_INTF_RMII | rmii_rxc);
+		if (plat->rmii_clk_from_mac)
+			plat->num_clks_to_config++;
+		intf_val |= (PHY_INTF_RMII | rmii_rxc | rmii_clk_from_mac);
 		break;
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_TXID:
@@ -173,35 +187,50 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat)
 		delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
 		break;
 	case PHY_INTERFACE_MODE_RMII:
-		/* the rmii reference clock is from external phy,
-		 * and the property "rmii_rxc" indicates which pin(TXC/RXC)
-		 * the reference clk is connected to. The reference clock is a
-		 * received signal, so rx_delay/rx_inv are used to indicate
-		 * the reference clock timing adjustment
-		 */
-		if (plat->rmii_rxc) {
-			/* the rmii reference clock from outside is connected
-			 * to RXC pin, the reference clock will be adjusted
-			 * by RXC delay macro circuit.
-			 */
-			delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay);
-			delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay);
-			delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
-		} else {
-			/* the rmii reference clock from outside is connected
-			 * to TXC pin, the reference clock will be adjusted
-			 * by TXC delay macro circuit.
+		if (plat->rmii_clk_from_mac) {
+			/* case 1: mac provides the rmii reference clock,
+			 * and the clock output to TXC pin.
+			 * The egress timing can be adjusted by GTXC delay macro circuit.
+			 * The ingress timing can be adjusted by TXC delay macro circuit.
 			 */
 			delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay);
 			delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay);
 			delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv);
+
+			delay_val |= FIELD_PREP(ETH_DLY_GTXC_ENABLE, !!mac_delay->tx_delay);
+			delay_val |= FIELD_PREP(ETH_DLY_GTXC_STAGES, mac_delay->tx_delay);
+			delay_val |= FIELD_PREP(ETH_DLY_GTXC_INV, mac_delay->tx_inv);
+		} else {
+			/* case 2: the rmii reference clock is from external phy,
+			 * and the property "rmii_rxc" indicates which pin(TXC/RXC)
+			 * the reference clk is connected to. The reference clock is a
+			 * received signal, so rx_delay/rx_inv are used to indicate
+			 * the reference clock timing adjustment
+			 */
+			if (plat->rmii_rxc) {
+				/* the rmii reference clock from outside is connected
+				 * to RXC pin, the reference clock will be adjusted
+				 * by RXC delay macro circuit.
+				 */
+				delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay);
+				delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay);
+				delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
+			} else {
+				/* the rmii reference clock from outside is connected
+				 * to TXC pin, the reference clock will be adjusted
+				 * by TXC delay macro circuit.
+				 */
+				delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay);
+				delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay);
+				delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv);
+			}
+			/* tx_inv will inverse the tx clock inside mac relateive to
+			 * reference clock from external phy,
+			 * and this bit is located in the same register with fine-tune
+			 */
+			if (mac_delay->tx_inv)
+				fine_val = ETH_RMII_DLY_TX_INV;
 		}
-		/* tx_inv will inverse the tx clock inside mac relateive to
-		 * reference clock from external phy,
-		 * and this bit is located in the same register with fine-tune
-		 */
-		if (mac_delay->tx_inv)
-			fine_val = ETH_RMII_DLY_TX_INV;
 		break;
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_TXID:
@@ -278,6 +307,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
 	mac_delay->tx_inv = of_property_read_bool(plat->np, "mediatek,txc-inverse");
 	mac_delay->rx_inv = of_property_read_bool(plat->np, "mediatek,rxc-inverse");
 	plat->rmii_rxc = of_property_read_bool(plat->np, "mediatek,rmii-rxc");
+	plat->rmii_clk_from_mac = of_property_read_bool(plat->np, "mediatek,rmii-clk-from-mac");
 
 	return 0;
 }
@@ -294,6 +324,8 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat)
 	for (i = 0; i < num; i++)
 		plat->clks[i].id = variant->clk_list[i];
 
+	plat->num_clks_to_config = variant->num_clks;
+
 	return devm_clk_bulk_get(plat->dev, num, plat->clks);
 }
 
@@ -321,7 +353,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
 		return ret;
 	}
 
-	ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
+	ret = clk_bulk_prepare_enable(plat->num_clks_to_config, plat->clks);
 	if (ret) {
 		dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
 		return ret;
@@ -336,9 +368,8 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
 static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
 {
 	struct mediatek_dwmac_plat_data *plat = priv;
-	const struct mediatek_dwmac_variant *variant = plat->variant;
 
-	clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
+	clk_bulk_disable_unprepare(plat->num_clks_to_config, plat->clks);
 
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 01b484cb177e..58e0511badba 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -335,14 +335,30 @@ static void sun8i_dwmac_dump_mac_regs(struct mac_device_info *hw,
 	}
 }
 
-static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan,
+				       bool rx, bool tx)
 {
-	writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN);
+	u32 value = readl(ioaddr + EMAC_INT_EN);
+
+	if (rx)
+		value |= EMAC_RX_INT;
+	if (tx)
+		value |= EMAC_TX_INT;
+
+	writel(value, ioaddr + EMAC_INT_EN);
 }
 
-static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan,
+					bool rx, bool tx)
 {
-	writel(0, ioaddr + EMAC_INT_EN);
+	u32 value = readl(ioaddr + EMAC_INT_EN);
+
+	if (rx)
+		value &= ~EMAC_RX_INT;
+	if (tx)
+		value &= ~EMAC_TX_INT;
+
+	writel(value, ioaddr + EMAC_INT_EN);
 }
 
 static void sun8i_dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 2dc70d104161..af50af27550b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -64,6 +64,8 @@
 #define GMAC_RXQCTRL_MCBCQEN_SHIFT	20
 #define GMAC_RXQCTRL_TACPQE		BIT(21)
 #define GMAC_RXQCTRL_TACPQE_SHIFT	21
+#define GMAC_RXQCTRL_FPRQ		GENMASK(26, 24)
+#define GMAC_RXQCTRL_FPRQ_SHIFT		24
 
 /* MAC Packet Filtering */
 #define GMAC_PACKET_FILTER_PR		BIT(0)
@@ -176,6 +178,8 @@ enum power_event {
 #define GMAC_CONFIG_SARC		GENMASK(30, 28)
 #define GMAC_CONFIG_SARC_SHIFT		28
 #define GMAC_CONFIG_IPC			BIT(27)
+#define GMAC_CONFIG_IPG			GENMASK(26, 24)
+#define GMAC_CONFIG_IPG_SHIFT		24
 #define GMAC_CONFIG_2K			BIT(22)
 #define GMAC_CONFIG_ACS			BIT(20)
 #define GMAC_CONFIG_BE			BIT(18)
@@ -183,6 +187,7 @@ enum power_event {
 #define GMAC_CONFIG_JE			BIT(16)
 #define GMAC_CONFIG_PS			BIT(15)
 #define GMAC_CONFIG_FES			BIT(14)
+#define GMAC_CONFIG_FES_SHIFT		14
 #define GMAC_CONFIG_DM			BIT(13)
 #define GMAC_CONFIG_LM			BIT(12)
 #define GMAC_CONFIG_DCRS		BIT(9)
@@ -190,6 +195,9 @@ enum power_event {
 #define GMAC_CONFIG_RE			BIT(0)
 
 /* MAC extended config */
+#define GMAC_CONFIG_EIPG		GENMASK(29, 25)
+#define GMAC_CONFIG_EIPG_SHIFT		25
+#define GMAC_CONFIG_EIPG_EN		BIT(24)
 #define GMAC_CONFIG_HDSMS		GENMASK(22, 20)
 #define GMAC_CONFIG_HDSMS_SHIFT		20
 #define GMAC_CONFIG_HDSMS_256		(0x2 << GMAC_CONFIG_HDSMS_SHIFT)
@@ -231,6 +239,11 @@ enum power_event {
 
 /* MAC HW features3 bitmap */
 #define GMAC_HW_FEAT_ASP		GENMASK(29, 28)
+#define GMAC_HW_FEAT_TBSSEL		BIT(27)
+#define GMAC_HW_FEAT_FPESEL		BIT(26)
+#define GMAC_HW_FEAT_ESTWID		GENMASK(21, 20)
+#define GMAC_HW_FEAT_ESTDEP		GENMASK(19, 17)
+#define GMAC_HW_FEAT_ESTSEL		BIT(16)
 #define GMAC_HW_FEAT_FRPES		GENMASK(14, 13)
 #define GMAC_HW_FEAT_FRPBS		GENMASK(12, 11)
 #define GMAC_HW_FEAT_FRPSEL		BIT(10)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 40ca00e596dd..f0c0ea616032 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -984,6 +984,8 @@ const struct stmmac_ops dwmac410_ops = {
 	.set_arp_offload = dwmac4_set_arp_offload,
 	.config_l3_filter = dwmac4_config_l3_filter,
 	.config_l4_filter = dwmac4_config_l4_filter,
+	.est_configure = dwmac5_est_configure,
+	.fpe_configure = dwmac5_fpe_configure,
 };
 
 const struct stmmac_ops dwmac510_ops = {
@@ -1027,6 +1029,8 @@ const struct stmmac_ops dwmac510_ops = {
 	.set_arp_offload = dwmac4_set_arp_offload,
 	.config_l3_filter = dwmac4_config_l3_filter,
 	.config_l4_filter = dwmac4_config_l4_filter,
+	.est_configure = dwmac5_est_configure,
+	.fpe_configure = dwmac5_fpe_configure,
 };
 
 int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 3e14da69f378..eff82065a501 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -10,6 +10,7 @@
 
 #include <linux/stmmac.h>
 #include "common.h"
+#include "dwmac4.h"
 #include "dwmac4_descs.h"
 
 static int dwmac4_wrback_get_tx_status(void *data, struct stmmac_extra_stats *x,
@@ -505,6 +506,14 @@ static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
 	p->des3 = cpu_to_le32(upper_32_bits(addr) | RDES3_BUFFER2_VALID_ADDR);
 }
 
+static void dwmac4_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec)
+{
+	p->des4 = cpu_to_le32((sec & TDES4_LT) | TDES4_LTV);
+	p->des5 = cpu_to_le32(nsec & TDES5_LT);
+	p->des6 = 0;
+	p->des7 = 0;
+}
+
 const struct stmmac_desc_ops dwmac4_desc_ops = {
 	.tx_status = dwmac4_wrback_get_tx_status,
 	.rx_status = dwmac4_wrback_get_rx_status,
@@ -534,6 +543,7 @@ const struct stmmac_desc_ops dwmac4_desc_ops = {
 	.set_vlan = dwmac4_set_vlan,
 	.get_rx_header_len = dwmac4_get_rx_header_len,
 	.set_sec_addr = dwmac4_set_sec_addr,
+	.set_tbs = dwmac4_set_tbs,
 };
 
 const struct stmmac_mode_ops dwmac4_ring_mode_ops = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
index 6d92109dc9aa..6da070ccd737 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
@@ -73,6 +73,13 @@
 #define TDES3_CONTEXT_TYPE		BIT(30)
 #define	TDES3_CONTEXT_TYPE_SHIFT	30
 
+/* TDES4 */
+#define TDES4_LTV			BIT(31)
+#define TDES4_LT			GENMASK(7, 0)
+
+/* TDES5 */
+#define TDES5_LT			GENMASK(31, 8)
+
 /* TDS3 use for both format (read and write back) */
 #define TDES3_OWN			BIT(31)
 #define TDES3_OWN_SHIFT			31
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index c15409030710..bb29bfcd62c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -404,6 +404,11 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
 	/* 5.10 Features */
 	dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28;
+	dma_cap->tbssel = (hw_cap & GMAC_HW_FEAT_TBSSEL) >> 27;
+	dma_cap->fpesel = (hw_cap & GMAC_HW_FEAT_FPESEL) >> 26;
+	dma_cap->estwid = (hw_cap & GMAC_HW_FEAT_ESTWID) >> 20;
+	dma_cap->estdep = (hw_cap & GMAC_HW_FEAT_ESTDEP) >> 17;
+	dma_cap->estsel = (hw_cap & GMAC_HW_FEAT_ESTSEL) >> 16;
 	dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13;
 	dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11;
 	dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10;
@@ -467,6 +472,25 @@ static void dwmac4_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
 	writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
 }
 
+static int dwmac4_enable_tbs(void __iomem *ioaddr, bool en, u32 chan)
+{
+	u32 value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan));
+
+	if (en)
+		value |= DMA_CONTROL_EDSE;
+	else
+		value &= ~DMA_CONTROL_EDSE;
+
+	writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
+
+	value = readl(ioaddr + DMA_CHAN_TX_CONTROL(chan)) & DMA_CONTROL_EDSE;
+	if (en && !value)
+		return -EIO;
+
+	writel(DMA_TBS_DEF_FTOS, ioaddr + DMA_TBS_CTRL);
+	return 0;
+}
+
 const struct stmmac_dma_ops dwmac4_dma_ops = {
 	.reset = dwmac4_dma_reset,
 	.init = dwmac4_dma_init,
@@ -523,4 +547,5 @@ const struct stmmac_dma_ops dwmac410_dma_ops = {
 	.qmode = dwmac4_qmode,
 	.set_bfsize = dwmac4_set_bfsize,
 	.enable_sph = dwmac4_enable_sph,
+	.enable_tbs = dwmac4_enable_tbs,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 589931795847..8391ca63d943 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -22,6 +22,7 @@
 #define DMA_DEBUG_STATUS_1		0x00001010
 #define DMA_DEBUG_STATUS_2		0x00001014
 #define DMA_AXI_BUS_MODE		0x00001028
+#define DMA_TBS_CTRL			0x00001050
 
 /* DMA Bus Mode bitmap */
 #define DMA_BUS_MODE_SFT_RESET		BIT(0)
@@ -82,6 +83,11 @@
 
 #define DMA_AXI_BURST_LEN_MASK		0x000000FE
 
+/* DMA TBS Control */
+#define DMA_TBS_FTOS			GENMASK(31, 8)
+#define DMA_TBS_FTOV			BIT(0)
+#define DMA_TBS_DEF_FTOS		(DMA_TBS_FTOS | DMA_TBS_FTOV)
+
 /* Following DMA defines are chanels oriented */
 #define DMA_CHAN_BASE_ADDR		0x00001100
 #define DMA_CHAN_BASE_OFFSET		0x80
@@ -114,6 +120,7 @@
 #define DMA_CONTROL_MSS_MASK		GENMASK(13, 0)
 
 /* DMA Tx Channel X Control register defines */
+#define DMA_CONTROL_EDSE		BIT(28)
 #define DMA_CONTROL_TSE			BIT(12)
 #define DMA_CONTROL_OSP			BIT(4)
 #define DMA_CONTROL_ST			BIT(0)
@@ -168,6 +175,8 @@
 /* DMA default interrupt mask for 4.00 */
 #define DMA_CHAN_INTR_DEFAULT_MASK	(DMA_CHAN_INTR_NORMAL | \
 					 DMA_CHAN_INTR_ABNORMAL)
+#define DMA_CHAN_INTR_DEFAULT_RX	(DMA_CHAN_INTR_ENA_RIE)
+#define DMA_CHAN_INTR_DEFAULT_TX	(DMA_CHAN_INTR_ENA_TIE)
 
 #define DMA_CHAN_INTR_NORMAL_4_10	(DMA_CHAN_INTR_ENA_NIE_4_10 | \
 					 DMA_CHAN_INTR_ENA_RIE | \
@@ -178,6 +187,8 @@
 /* DMA default interrupt mask for 4.10a */
 #define DMA_CHAN_INTR_DEFAULT_MASK_4_10	(DMA_CHAN_INTR_NORMAL_4_10 | \
 					 DMA_CHAN_INTR_ABNORMAL_4_10)
+#define DMA_CHAN_INTR_DEFAULT_RX_4_10	(DMA_CHAN_INTR_ENA_RIE)
+#define DMA_CHAN_INTR_DEFAULT_TX_4_10	(DMA_CHAN_INTR_ENA_TIE)
 
 /* channel 0 specific fields */
 #define DMA_CHAN0_DBG_STAT_TPS		GENMASK(15, 12)
@@ -186,9 +197,10 @@
 #define DMA_CHAN0_DBG_STAT_RPS_SHIFT	8
 
 int dwmac4_dma_reset(void __iomem *ioaddr);
-void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan);
-void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan);
-void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
 void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan);
 void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan);
 void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index f2a29a90e085..9becca280074 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -97,21 +97,52 @@ void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
 	writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan));
 }
 
-void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
 {
-	writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr +
-	       DMA_CHAN_INTR_ENA(chan));
+	u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+	if (rx)
+		value |= DMA_CHAN_INTR_DEFAULT_RX;
+	if (tx)
+		value |= DMA_CHAN_INTR_DEFAULT_TX;
+
+	writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
-void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
 {
-	writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10,
-	       ioaddr + DMA_CHAN_INTR_ENA(chan));
+	u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+	if (rx)
+		value |= DMA_CHAN_INTR_DEFAULT_RX_4_10;
+	if (tx)
+		value |= DMA_CHAN_INTR_DEFAULT_TX_4_10;
+
+	writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
-void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
 {
-	writel(0, ioaddr + DMA_CHAN_INTR_ENA(chan));
+	u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+	if (rx)
+		value &= ~DMA_CHAN_INTR_DEFAULT_RX;
+	if (tx)
+		value &= ~DMA_CHAN_INTR_DEFAULT_TX;
+
+	writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
+}
+
+void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+{
+	u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+	if (rx)
+		value &= ~DMA_CHAN_INTR_DEFAULT_RX_4_10;
+	if (tx)
+		value &= ~DMA_CHAN_INTR_DEFAULT_TX_4_10;
+
+	writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index e436fa160c7d..494c859b4ade 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -550,3 +550,122 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 	writel(val, ioaddr + MAC_PPS_CONTROL);
 	return 0;
 }
+
+static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl)
+{
+	u32 ctrl;
+
+	writel(val, ioaddr + MTL_EST_GCL_DATA);
+
+	ctrl = (reg << ADDR_SHIFT);
+	ctrl |= gcl ? 0 : GCRR;
+
+	writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL);
+
+	ctrl |= SRWO;
+	writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL);
+
+	return readl_poll_timeout(ioaddr + MTL_EST_GCL_CONTROL,
+				  ctrl, !(ctrl & SRWO), 100, 5000);
+}
+
+int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
+			 unsigned int ptp_rate)
+{
+	u32 speed, total_offset, offset, ctrl, ctr_low;
+	u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG);
+	u32 mac_cfg = readl(ioaddr + GMAC_CONFIG);
+	int i, ret = 0x0;
+	u64 total_ctr;
+
+	if (extcfg & GMAC_CONFIG_EIPG_EN) {
+		offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT;
+		offset = 104 + (offset * 8);
+	} else {
+		offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT;
+		offset = 96 - (offset * 8);
+	}
+
+	speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES);
+	speed = speed >> GMAC_CONFIG_FES_SHIFT;
+
+	switch (speed) {
+	case 0x0:
+		offset = offset * 1000; /* 1G */
+		break;
+	case 0x1:
+		offset = offset * 400; /* 2.5G */
+		break;
+	case 0x2:
+		offset = offset * 100000; /* 10M */
+		break;
+	case 0x3:
+		offset = offset * 10000; /* 100M */
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	offset = offset / 1000;
+
+	ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false);
+	ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false);
+	ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false);
+	ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false);
+	if (ret)
+		return ret;
+
+	total_offset = 0;
+	for (i = 0; i < cfg->gcl_size; i++) {
+		ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true);
+		if (ret)
+			return ret;
+
+		total_offset += offset;
+	}
+
+	total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000;
+	total_ctr += total_offset;
+
+	ctr_low = do_div(total_ctr, 1000000000);
+
+	ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false);
+	ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false);
+	if (ret)
+		return ret;
+
+	ctrl = readl(ioaddr + MTL_EST_CONTROL);
+	ctrl &= ~PTOV;
+	ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT;
+	if (cfg->enable)
+		ctrl |= EEST | SSWL;
+	else
+		ctrl &= ~EEST;
+
+	writel(ctrl, ioaddr + MTL_EST_CONTROL);
+	return 0;
+}
+
+void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+			  bool enable)
+{
+	u32 value;
+
+	if (!enable) {
+		value = readl(ioaddr + MAC_FPE_CTRL_STS);
+
+		value &= ~EFPE;
+
+		writel(value, ioaddr + MAC_FPE_CTRL_STS);
+		return;
+	}
+
+	value = readl(ioaddr + GMAC_RXQ_CTRL1);
+	value &= ~GMAC_RXQCTRL_FPRQ;
+	value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT;
+	writel(value, ioaddr + GMAC_RXQ_CTRL1);
+
+	value = readl(ioaddr + MAC_FPE_CTRL_STS);
+	value |= EFPE;
+	writel(value, ioaddr + MAC_FPE_CTRL_STS);
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 23fecf68f781..3e8faa96b4d4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -11,6 +11,9 @@
 #define PRTYEN				BIT(1)
 #define TMOUTEN				BIT(0)
 
+#define MAC_FPE_CTRL_STS		0x00000234
+#define EFPE				BIT(0)
+
 #define MAC_PPS_CONTROL			0x00000b70
 #define PPS_MAXIDX(x)			((((x) + 1) * 8) - 1)
 #define PPS_MINIDX(x)			((x) * 8)
@@ -30,6 +33,23 @@
 #define MAC_PPSx_INTERVAL(x)		(0x00000b88 + ((x) * 0x10))
 #define MAC_PPSx_WIDTH(x)		(0x00000b8c + ((x) * 0x10))
 
+#define MTL_EST_CONTROL			0x00000c50
+#define PTOV				GENMASK(31, 24)
+#define PTOV_SHIFT			24
+#define SSWL				BIT(1)
+#define EEST				BIT(0)
+#define MTL_EST_GCL_CONTROL		0x00000c80
+#define BTR_LOW				0x0
+#define BTR_HIGH			0x1
+#define CTR_LOW				0x2
+#define CTR_HIGH			0x3
+#define TER				0x4
+#define LLR				0x5
+#define ADDR_SHIFT			8
+#define GCRR				BIT(2)
+#define SRWO				BIT(0)
+#define MTL_EST_GCL_DATA		0x00000c84
+
 #define MTL_RXP_CONTROL_STATUS		0x00000ca0
 #define RXPI				BIT(31)
 #define NPE				GENMASK(23, 16)
@@ -83,5 +103,9 @@ int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
 int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 			   struct stmmac_pps_cfg *cfg, bool enable,
 			   u32 sub_second_inc, u32 systime_flags);
+int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
+			 unsigned int ptp_rate);
+void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+			  bool enable);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index 292b880f3f9f..e5dbd0bc257e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -96,6 +96,8 @@
 
 /* DMA default interrupt mask */
 #define DMA_INTR_DEFAULT_MASK	(DMA_INTR_NORMAL | DMA_INTR_ABNORMAL)
+#define DMA_INTR_DEFAULT_RX	(DMA_INTR_ENA_RIE)
+#define DMA_INTR_DEFAULT_TX	(DMA_INTR_ENA_TIE)
 
 /* DMA Status register defines */
 #define DMA_STATUS_GLPII	0x40000000	/* GMAC LPI interrupt */
@@ -130,8 +132,8 @@
 #define NUM_DWMAC1000_DMA_REGS	23
 
 void dwmac_enable_dma_transmission(void __iomem *ioaddr);
-void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan);
-void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
 void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan);
 void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan);
 void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 1bc25aa86dbd..688d36095333 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -37,14 +37,28 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr)
 	writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
 }
 
-void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
 {
-	writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
+	u32 value = readl(ioaddr + DMA_INTR_ENA);
+
+	if (rx)
+		value |= DMA_INTR_DEFAULT_RX;
+	if (tx)
+		value |= DMA_INTR_DEFAULT_TX;
+
+	writel(value, ioaddr + DMA_INTR_ENA);
 }
 
-void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
 {
-	writel(0, ioaddr + DMA_INTR_ENA);
+	u32 value = readl(ioaddr + DMA_INTR_ENA);
+
+	if (rx)
+		value &= ~DMA_INTR_DEFAULT_RX;
+	if (tx)
+		value &= ~DMA_INTR_DEFAULT_TX;
+
+	writel(value, ioaddr + DMA_INTR_ENA);
 }
 
 void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index ef8a07c68ca7..6c3b8a950f58 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -73,6 +73,9 @@
 #define XGMAC_RXQ_CTRL0			0x000000a0
 #define XGMAC_RXQEN(x)			GENMASK((x) * 2 + 1, (x) * 2)
 #define XGMAC_RXQEN_SHIFT(x)		((x) * 2)
+#define XGMAC_RXQ_CTRL1			0x000000a4
+#define XGMAC_RQ			GENMASK(7, 4)
+#define XGMAC_RQ_SHIFT			4
 #define XGMAC_RXQ_CTRL2			0x000000a8
 #define XGMAC_RXQ_CTRL3			0x000000ac
 #define XGMAC_PSRQ(x)			GENMASK((x) * 8 + 7, (x) * 8)
@@ -136,6 +139,11 @@
 #define XGMAC_HWFEAT_TXQCNT		GENMASK(9, 6)
 #define XGMAC_HWFEAT_RXQCNT		GENMASK(3, 0)
 #define XGMAC_HW_FEATURE3		0x00000128
+#define XGMAC_HWFEAT_TBSSEL		BIT(27)
+#define XGMAC_HWFEAT_FPESEL		BIT(26)
+#define XGMAC_HWFEAT_ESTWID		GENMASK(24, 23)
+#define XGMAC_HWFEAT_ESTDEP		GENMASK(22, 20)
+#define XGMAC_HWFEAT_ESTSEL		BIT(19)
 #define XGMAC_HWFEAT_ASP		GENMASK(15, 14)
 #define XGMAC_HWFEAT_DVLAN		BIT(13)
 #define XGMAC_HWFEAT_FRPES		GENMASK(12, 11)
@@ -148,6 +156,8 @@
 #define XGMAC_MDIO_ADDR			0x00000200
 #define XGMAC_MDIO_DATA			0x00000204
 #define XGMAC_MDIO_C22P			0x00000220
+#define XGMAC_FPE_CTRL_STS		0x00000280
+#define XGMAC_EFPE			BIT(0)
 #define XGMAC_ADDRx_HIGH(x)		(0x00000300 + (x) * 0x8)
 #define XGMAC_ADDR_MAX			32
 #define XGMAC_AE			BIT(31)
@@ -237,6 +247,22 @@
 #define XGMAC_TC_PRTY_MAP1		0x00001044
 #define XGMAC_PSTC(x)			GENMASK((x) * 8 + 7, (x) * 8)
 #define XGMAC_PSTC_SHIFT(x)		((x) * 8)
+#define XGMAC_MTL_EST_CONTROL		0x00001050
+#define XGMAC_PTOV			GENMASK(31, 23)
+#define XGMAC_PTOV_SHIFT		23
+#define XGMAC_SSWL			BIT(1)
+#define XGMAC_EEST			BIT(0)
+#define XGMAC_MTL_EST_GCL_CONTROL	0x00001080
+#define XGMAC_BTR_LOW			0x0
+#define XGMAC_BTR_HIGH			0x1
+#define XGMAC_CTR_LOW			0x2
+#define XGMAC_CTR_HIGH			0x3
+#define XGMAC_TER			0x4
+#define XGMAC_LLR			0x5
+#define XGMAC_ADDR_SHIFT		8
+#define XGMAC_GCRR			BIT(2)
+#define XGMAC_SRWO			BIT(0)
+#define XGMAC_MTL_EST_GCL_DATA		0x00001084
 #define XGMAC_MTL_RXP_CONTROL_STATUS	0x000010a0
 #define XGMAC_RXPI			BIT(31)
 #define XGMAC_NPE			GENMASK(23, 16)
@@ -321,6 +347,13 @@
 #define XGMAC_TDPS			GENMASK(29, 0)
 #define XGMAC_RX_EDMA_CTRL		0x00003044
 #define XGMAC_RDPS			GENMASK(29, 0)
+#define XGMAC_DMA_TBS_CTRL0		0x00003054
+#define XGMAC_DMA_TBS_CTRL1		0x00003058
+#define XGMAC_DMA_TBS_CTRL2		0x0000305c
+#define XGMAC_DMA_TBS_CTRL3		0x00003060
+#define XGMAC_FTOS			GENMASK(31, 8)
+#define XGMAC_FTOV			BIT(0)
+#define XGMAC_DEF_FTOS			(XGMAC_FTOS | XGMAC_FTOV)
 #define XGMAC_DMA_SAFETY_INT_STATUS	0x00003064
 #define XGMAC_MCSIS			BIT(31)
 #define XGMAC_MSUIS			BIT(29)
@@ -335,6 +368,7 @@
 #define XGMAC_SPH			BIT(24)
 #define XGMAC_PBLx8			BIT(16)
 #define XGMAC_DMA_CH_TX_CONTROL(x)	(0x00003104 + (0x80 * (x)))
+#define XGMAC_EDSE			BIT(28)
 #define XGMAC_TxPBL			GENMASK(21, 16)
 #define XGMAC_TxPBL_SHIFT		16
 #define XGMAC_TSE			BIT(12)
@@ -363,6 +397,8 @@
 #define XGMAC_TIE			BIT(0)
 #define XGMAC_DMA_INT_DEFAULT_EN	(XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \
 					XGMAC_RIE | XGMAC_TIE)
+#define XGMAC_DMA_INT_DEFAULT_RX	(XGMAC_RBUE | XGMAC_RIE)
+#define XGMAC_DMA_INT_DEFAULT_TX	(XGMAC_TIE)
 #define XGMAC_DMA_CH_Rx_WATCHDOG(x)	(0x0000313c + (0x80 * (x)))
 #define XGMAC_RWT			GENMASK(7, 0)
 #define XGMAC_DMA_CH_STATUS(x)		(0x00003160 + (0x80 * (x)))
@@ -377,6 +413,9 @@
 #define XGMAC_REGSIZE			((0x0000317c + (0x80 * 15)) / 4)
 
 /* Descriptors */
+#define XGMAC_TDES0_LTV			BIT(31)
+#define XGMAC_TDES0_LT			GENMASK(7, 0)
+#define XGMAC_TDES1_LT			GENMASK(31, 8)
 #define XGMAC_TDES2_IVT			GENMASK(31, 16)
 #define XGMAC_TDES2_IVT_SHIFT		16
 #define XGMAC_TDES2_IOC			BIT(31)
@@ -395,6 +434,7 @@
 #define XGMAC_TDES3_TCMSSV		BIT(26)
 #define XGMAC_TDES3_SAIC		GENMASK(25, 23)
 #define XGMAC_TDES3_SAIC_SHIFT		23
+#define XGMAC_TDES3_TBSV		BIT(24)
 #define XGMAC_TDES3_THL			GENMASK(22, 19)
 #define XGMAC_TDES3_THL_SHIFT		19
 #define XGMAC_TDES3_IVTIR		GENMASK(19, 18)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 082f5ee9e525..2af3ac5409b7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1359,6 +1359,81 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en,
 	writel(value, ioaddr + XGMAC_RX_CONFIG);
 }
 
+static int dwxgmac3_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl)
+{
+	u32 ctrl;
+
+	writel(val, ioaddr + XGMAC_MTL_EST_GCL_DATA);
+
+	ctrl = (reg << XGMAC_ADDR_SHIFT);
+	ctrl |= gcl ? 0 : XGMAC_GCRR;
+
+	writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL);
+
+	ctrl |= XGMAC_SRWO;
+	writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL);
+
+	return readl_poll_timeout_atomic(ioaddr + XGMAC_MTL_EST_GCL_CONTROL,
+					 ctrl, !(ctrl & XGMAC_SRWO), 100, 5000);
+}
+
+static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
+				  unsigned int ptp_rate)
+{
+	int i, ret = 0x0;
+	u32 ctrl;
+
+	ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_LOW, cfg->btr[0], false);
+	ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_HIGH, cfg->btr[1], false);
+	ret |= dwxgmac3_est_write(ioaddr, XGMAC_TER, cfg->ter, false);
+	ret |= dwxgmac3_est_write(ioaddr, XGMAC_LLR, cfg->gcl_size, false);
+	ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_LOW, cfg->ctr[0], false);
+	ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_HIGH, cfg->ctr[1], false);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < cfg->gcl_size; i++) {
+		ret = dwxgmac3_est_write(ioaddr, i, cfg->gcl[i], true);
+		if (ret)
+			return ret;
+	}
+
+	ctrl = readl(ioaddr + XGMAC_MTL_EST_CONTROL);
+	ctrl &= ~XGMAC_PTOV;
+	ctrl |= ((1000000000 / ptp_rate) * 9) << XGMAC_PTOV_SHIFT;
+	if (cfg->enable)
+		ctrl |= XGMAC_EEST | XGMAC_SSWL;
+	else
+		ctrl &= ~XGMAC_EEST;
+
+	writel(ctrl, ioaddr + XGMAC_MTL_EST_CONTROL);
+	return 0;
+}
+
+static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq,
+				   u32 num_rxq, bool enable)
+{
+	u32 value;
+
+	if (!enable) {
+		value = readl(ioaddr + XGMAC_FPE_CTRL_STS);
+
+		value &= ~XGMAC_EFPE;
+
+		writel(value, ioaddr + XGMAC_FPE_CTRL_STS);
+		return;
+	}
+
+	value = readl(ioaddr + XGMAC_RXQ_CTRL1);
+	value &= ~XGMAC_RQ;
+	value |= (num_rxq - 1) << XGMAC_RQ_SHIFT;
+	writel(value, ioaddr + XGMAC_RXQ_CTRL1);
+
+	value = readl(ioaddr + XGMAC_FPE_CTRL_STS);
+	value |= XGMAC_EFPE;
+	writel(value, ioaddr + XGMAC_FPE_CTRL_STS);
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
@@ -1402,6 +1477,8 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.config_l3_filter = dwxgmac2_config_l3_filter,
 	.config_l4_filter = dwxgmac2_config_l4_filter,
 	.set_arp_offload = dwxgmac2_set_arp_offload,
+	.est_configure = dwxgmac3_est_configure,
+	.fpe_configure = dwxgmac3_fpe_configure,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index bd5838ce1e8a..c3d654cfa9ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -339,6 +339,14 @@ static void dwxgmac2_set_vlan(struct dma_desc *p, u32 type)
 	p->des2 |= cpu_to_le32(type & XGMAC_TDES2_VTIR);
 }
 
+static void dwxgmac2_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec)
+{
+	p->des4 = cpu_to_le32((sec & XGMAC_TDES0_LT) | XGMAC_TDES0_LTV);
+	p->des5 = cpu_to_le32(nsec & XGMAC_TDES1_LT);
+	p->des6 = 0;
+	p->des7 = 0;
+}
+
 const struct stmmac_desc_ops dwxgmac210_desc_ops = {
 	.tx_status = dwxgmac2_get_tx_status,
 	.rx_status = dwxgmac2_get_rx_status,
@@ -368,4 +376,5 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = {
 	.set_sarc = dwxgmac2_set_sarc,
 	.set_vlan_tag = dwxgmac2_set_vlan_tag,
 	.set_vlan = dwxgmac2_set_vlan,
+	.set_tbs = dwxgmac2_set_tbs,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index f3f08ccc379b..77308c5c5d29 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -248,14 +248,30 @@ static void dwxgmac2_dma_tx_mode(void __iomem *ioaddr, int mode,
 	writel(value, ioaddr +  XGMAC_MTL_TXQ_OPMODE(channel));
 }
 
-static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan,
+				    bool rx, bool tx)
 {
-	writel(XGMAC_DMA_INT_DEFAULT_EN, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+	u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+
+	if (rx)
+		value |= XGMAC_DMA_INT_DEFAULT_RX;
+	if (tx)
+		value |= XGMAC_DMA_INT_DEFAULT_TX;
+
+	writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
 }
 
-static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan,
+				     bool rx, bool tx)
 {
-	writel(0, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+	u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+
+	if (rx)
+		value &= ~XGMAC_DMA_INT_DEFAULT_RX;
+	if (tx)
+		value &= ~XGMAC_DMA_INT_DEFAULT_TX;
+
+	writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
 }
 
 static void dwxgmac2_dma_start_tx(void __iomem *ioaddr, u32 chan)
@@ -413,6 +429,11 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
 
 	/* MAC HW feature 3 */
 	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE3);
+	dma_cap->tbssel = (hw_cap & XGMAC_HWFEAT_TBSSEL) >> 27;
+	dma_cap->fpesel = (hw_cap & XGMAC_HWFEAT_FPESEL) >> 26;
+	dma_cap->estwid = (hw_cap & XGMAC_HWFEAT_ESTWID) >> 23;
+	dma_cap->estdep = (hw_cap & XGMAC_HWFEAT_ESTDEP) >> 20;
+	dma_cap->estsel = (hw_cap & XGMAC_HWFEAT_ESTSEL) >> 19;
 	dma_cap->asp = (hw_cap & XGMAC_HWFEAT_ASP) >> 14;
 	dma_cap->dvlan = (hw_cap & XGMAC_HWFEAT_DVLAN) >> 13;
 	dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11;
@@ -503,6 +524,28 @@ static void dwxgmac2_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
 	writel(value, ioaddr + XGMAC_DMA_CH_CONTROL(chan));
 }
 
+static int dwxgmac2_enable_tbs(void __iomem *ioaddr, bool en, u32 chan)
+{
+	u32 value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
+
+	if (en)
+		value |= XGMAC_EDSE;
+	else
+		value &= ~XGMAC_EDSE;
+
+	writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan));
+
+	value = readl(ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)) & XGMAC_EDSE;
+	if (en && !value)
+		return -EIO;
+
+	writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL0);
+	writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL1);
+	writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL2);
+	writel(XGMAC_DEF_FTOS, ioaddr + XGMAC_DMA_TBS_CTRL3);
+	return 0;
+}
+
 const struct stmmac_dma_ops dwxgmac210_dma_ops = {
 	.reset = dwxgmac2_dma_reset,
 	.init = dwxgmac2_dma_init,
@@ -530,4 +573,5 @@ const struct stmmac_dma_ops dwxgmac210_dma_ops = {
 	.qmode = dwxgmac2_qmode,
 	.set_bfsize = dwxgmac2_set_bfsize,
 	.enable_sph = dwxgmac2_enable_sph,
+	.enable_tbs = dwxgmac2_enable_tbs,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index aa5b917398fe..df63b0367aff 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -29,6 +29,7 @@ struct stmmac_extra_stats;
 struct stmmac_safety_stats;
 struct dma_desc;
 struct dma_extended_desc;
+struct dma_edesc;
 
 /* Descriptors helpers */
 struct stmmac_desc_ops {
@@ -95,6 +96,7 @@ struct stmmac_desc_ops {
 	void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag,
 			     u32 inner_type);
 	void (*set_vlan)(struct dma_desc *p, u32 type);
+	void (*set_tbs)(struct dma_edesc *p, u32 sec, u32 nsec);
 };
 
 #define stmmac_init_rx_desc(__priv, __args...) \
@@ -157,6 +159,8 @@ struct stmmac_desc_ops {
 	stmmac_do_void_callback(__priv, desc, set_vlan_tag, __args)
 #define stmmac_set_desc_vlan(__priv, __args...) \
 	stmmac_do_void_callback(__priv, desc, set_vlan, __args)
+#define stmmac_set_desc_tbs(__priv, __args...) \
+	stmmac_do_void_callback(__priv, desc, set_tbs, __args)
 
 struct stmmac_dma_cfg;
 struct dma_features;
@@ -187,8 +191,10 @@ struct stmmac_dma_ops {
 	void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
 				   void __iomem *ioaddr);
 	void (*enable_dma_transmission) (void __iomem *ioaddr);
-	void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
-	void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
+	void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan,
+			       bool rx, bool tx);
+	void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan,
+				bool rx, bool tx);
 	void (*start_tx)(void __iomem *ioaddr, u32 chan);
 	void (*stop_tx)(void __iomem *ioaddr, u32 chan);
 	void (*start_rx)(void __iomem *ioaddr, u32 chan);
@@ -208,6 +214,7 @@ struct stmmac_dma_ops {
 	void (*qmode)(void __iomem *ioaddr, u32 channel, u8 qmode);
 	void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
 	void (*enable_sph)(void __iomem *ioaddr, bool en, u32 chan);
+	int (*enable_tbs)(void __iomem *ioaddr, bool en, u32 chan);
 };
 
 #define stmmac_reset(__priv, __args...) \
@@ -266,6 +273,8 @@ struct stmmac_dma_ops {
 	stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
 #define stmmac_enable_sph(__priv, __args...) \
 	stmmac_do_void_callback(__priv, dma, enable_sph, __args)
+#define stmmac_enable_tbs(__priv, __args...) \
+	stmmac_do_callback(__priv, dma, enable_tbs, __args)
 
 struct mac_device_info;
 struct net_device;
@@ -274,6 +283,7 @@ struct stmmac_safety_stats;
 struct stmmac_tc_entry;
 struct stmmac_pps_cfg;
 struct stmmac_rss;
+struct stmmac_est;
 
 /* Helpers to program the MAC core */
 struct stmmac_ops {
@@ -371,6 +381,10 @@ struct stmmac_ops {
 				bool en, bool udp, bool sa, bool inv,
 				u32 match);
 	void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
+	int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg,
+			     unsigned int ptp_rate);
+	void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+			      bool enable);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -457,6 +471,10 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, config_l4_filter, __args)
 #define stmmac_set_arp_offload(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, set_arp_offload, __args)
+#define stmmac_est_configure(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, est_configure, __args)
+#define stmmac_fpe_configure(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, fpe_configure, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -514,6 +532,8 @@ struct stmmac_priv;
 struct tc_cls_u32_offload;
 struct tc_cbs_qopt_offload;
 struct flow_cls_offload;
+struct tc_taprio_qopt_offload;
+struct tc_etf_qopt_offload;
 
 struct stmmac_tc_ops {
 	int (*init)(struct stmmac_priv *priv);
@@ -523,6 +543,10 @@ struct stmmac_tc_ops {
 			 struct tc_cbs_qopt_offload *qopt);
 	int (*setup_cls)(struct stmmac_priv *priv,
 			 struct flow_cls_offload *cls);
+	int (*setup_taprio)(struct stmmac_priv *priv,
+			    struct tc_taprio_qopt_offload *qopt);
+	int (*setup_etf)(struct stmmac_priv *priv,
+			 struct tc_etf_qopt_offload *qopt);
 };
 
 #define stmmac_tc_init(__priv, __args...) \
@@ -533,6 +557,10 @@ struct stmmac_tc_ops {
 	stmmac_do_callback(__priv, tc, setup_cbs, __args)
 #define stmmac_tc_setup_cls(__priv, __args...) \
 	stmmac_do_callback(__priv, tc, setup_cls, __args)
+#define stmmac_tc_setup_taprio(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, setup_taprio, __args)
+#define stmmac_tc_setup_etf(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, setup_etf, __args)
 
 struct stmmac_counters;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 252cf48c5816..a57b0fa815ab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -119,6 +119,13 @@
 #define MMC_RX_ICMP_GD_OCTETS		0x180
 #define MMC_RX_ICMP_ERR_OCTETS		0x184
 
+#define MMC_TX_FPE_FRAG			0x1a8
+#define MMC_TX_HOLD_REQ			0x1ac
+#define MMC_RX_PKT_ASSEMBLY_ERR		0x1c8
+#define MMC_RX_PKT_SMD_ERR		0x1cc
+#define MMC_RX_PKT_ASSEMBLY_OK		0x1d0
+#define MMC_RX_FPE_FRAG			0x1d4
+
 /* XGMAC MMC Registers */
 #define MMC_XGMAC_TX_OCTET_GB		0x14
 #define MMC_XGMAC_TX_PKT_GB		0x1c
@@ -315,6 +322,15 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
 	mmc->mmc_rx_tcp_err_octets += readl(mmcaddr + MMC_RX_TCP_ERR_OCTETS);
 	mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS);
 	mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS);
+
+	mmc->mmc_tx_fpe_fragment_cntr += readl(mmcaddr + MMC_TX_FPE_FRAG);
+	mmc->mmc_tx_hold_req_cntr += readl(mmcaddr + MMC_TX_HOLD_REQ);
+	mmc->mmc_rx_packet_assembly_err_cntr +=
+		readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_ERR);
+	mmc->mmc_rx_packet_smd_err_cntr += readl(mmcaddr + MMC_RX_PKT_SMD_ERR);
+	mmc->mmc_rx_packet_assembly_ok_cntr +=
+		readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_OK);
+	mmc->mmc_rx_fpe_fragment_cntr += readl(mmcaddr + MMC_RX_FPE_FRAG);
 }
 
 const struct stmmac_mmc_ops dwmac_mmc_ops = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index d993fc7e82c3..9c02fc754bf1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -39,13 +39,18 @@ struct stmmac_tx_info {
 	bool is_jumbo;
 };
 
+#define STMMAC_TBS_AVAIL	BIT(0)
+#define STMMAC_TBS_EN		BIT(1)
+
 /* Frequently used values are kept adjacent for cache effect */
 struct stmmac_tx_queue {
 	u32 tx_count_frames;
+	int tbs;
 	struct timer_list txtimer;
 	u32 queue_index;
 	struct stmmac_priv *priv_data;
 	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
+	struct dma_edesc *dma_entx;
 	struct dma_desc *dma_tx;
 	struct sk_buff **tx_skbuff;
 	struct stmmac_tx_info *tx_skbuff_dma;
@@ -88,6 +93,7 @@ struct stmmac_channel {
 	struct napi_struct rx_napi ____cacheline_aligned_in_smp;
 	struct napi_struct tx_napi ____cacheline_aligned_in_smp;
 	struct stmmac_priv *priv_data;
+	spinlock_t lock;
 	u32 index;
 };
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 80d59b775907..ff1cbfc834b0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -388,9 +388,8 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 	/* Using PCS we cannot dial with the phy registers at this stage
 	 * so we do not support extra feature like EEE.
 	 */
-	if ((priv->hw->pcs == STMMAC_PCS_RGMII) ||
-	    (priv->hw->pcs == STMMAC_PCS_TBI) ||
-	    (priv->hw->pcs == STMMAC_PCS_RTBI))
+	if (priv->hw->pcs == STMMAC_PCS_TBI ||
+	    priv->hw->pcs == STMMAC_PCS_RTBI)
 		return false;
 
 	/* Check if MAC core supports the EEE feature. */
@@ -1090,6 +1089,8 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv)
 
 		if (priv->extend_desc)
 			head_tx = (void *)tx_q->dma_etx;
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			head_tx = (void *)tx_q->dma_entx;
 		else
 			head_tx = (void *)tx_q->dma_tx;
 
@@ -1163,13 +1164,19 @@ static void stmmac_clear_tx_descriptors(struct stmmac_priv *priv, u32 queue)
 	int i;
 
 	/* Clear the TX descriptors */
-	for (i = 0; i < DMA_TX_SIZE; i++)
+	for (i = 0; i < DMA_TX_SIZE; i++) {
+		int last = (i == (DMA_TX_SIZE - 1));
+		struct dma_desc *p;
+
 		if (priv->extend_desc)
-			stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic,
-					priv->mode, (i == DMA_TX_SIZE - 1));
+			p = &tx_q->dma_etx[i].basic;
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			p = &tx_q->dma_entx[i].basic;
 		else
-			stmmac_init_tx_desc(priv, &tx_q->dma_tx[i],
-					priv->mode, (i == DMA_TX_SIZE - 1));
+			p = &tx_q->dma_tx[i];
+
+		stmmac_init_tx_desc(priv, p, priv->mode, last);
+	}
 }
 
 /**
@@ -1383,7 +1390,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 			if (priv->extend_desc)
 				stmmac_mode_init(priv, tx_q->dma_etx,
 						tx_q->dma_tx_phy, DMA_TX_SIZE, 1);
-			else
+			else if (!(tx_q->tbs & STMMAC_TBS_AVAIL))
 				stmmac_mode_init(priv, tx_q->dma_tx,
 						tx_q->dma_tx_phy, DMA_TX_SIZE, 0);
 		}
@@ -1392,6 +1399,8 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 			struct dma_desc *p;
 			if (priv->extend_desc)
 				p = &((tx_q->dma_etx + i)->basic);
+			else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+				p = &((tx_q->dma_entx + i)->basic);
 			else
 				p = tx_q->dma_tx + i;
 
@@ -1511,19 +1520,26 @@ static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
 	/* Free TX queue resources */
 	for (queue = 0; queue < tx_count; queue++) {
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+		size_t size;
+		void *addr;
 
 		/* Release the DMA TX socket buffers */
 		dma_free_tx_skbufs(priv, queue);
 
-		/* Free DMA regions of consistent memory previously allocated */
-		if (!priv->extend_desc)
-			dma_free_coherent(priv->device,
-					  DMA_TX_SIZE * sizeof(struct dma_desc),
-					  tx_q->dma_tx, tx_q->dma_tx_phy);
-		else
-			dma_free_coherent(priv->device, DMA_TX_SIZE *
-					  sizeof(struct dma_extended_desc),
-					  tx_q->dma_etx, tx_q->dma_tx_phy);
+		if (priv->extend_desc) {
+			size = sizeof(struct dma_extended_desc);
+			addr = tx_q->dma_etx;
+		} else if (tx_q->tbs & STMMAC_TBS_AVAIL) {
+			size = sizeof(struct dma_edesc);
+			addr = tx_q->dma_entx;
+		} else {
+			size = sizeof(struct dma_desc);
+			addr = tx_q->dma_tx;
+		}
+
+		size *= DMA_TX_SIZE;
+
+		dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy);
 
 		kfree(tx_q->tx_skbuff_dma);
 		kfree(tx_q->tx_skbuff);
@@ -1616,6 +1632,8 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 	/* TX queues buffers and DMA */
 	for (queue = 0; queue < tx_count; queue++) {
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+		size_t size;
+		void *addr;
 
 		tx_q->queue_index = queue;
 		tx_q->priv_data = priv;
@@ -1632,28 +1650,32 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 		if (!tx_q->tx_skbuff)
 			goto err_dma;
 
-		if (priv->extend_desc) {
-			tx_q->dma_etx = dma_alloc_coherent(priv->device,
-							   DMA_TX_SIZE * sizeof(struct dma_extended_desc),
-							   &tx_q->dma_tx_phy,
-							   GFP_KERNEL);
-			if (!tx_q->dma_etx)
-				goto err_dma;
-		} else {
-			tx_q->dma_tx = dma_alloc_coherent(priv->device,
-							  DMA_TX_SIZE * sizeof(struct dma_desc),
-							  &tx_q->dma_tx_phy,
-							  GFP_KERNEL);
-			if (!tx_q->dma_tx)
-				goto err_dma;
-		}
+		if (priv->extend_desc)
+			size = sizeof(struct dma_extended_desc);
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			size = sizeof(struct dma_edesc);
+		else
+			size = sizeof(struct dma_desc);
+
+		size *= DMA_TX_SIZE;
+
+		addr = dma_alloc_coherent(priv->device, size,
+					  &tx_q->dma_tx_phy, GFP_KERNEL);
+		if (!addr)
+			goto err_dma;
+
+		if (priv->extend_desc)
+			tx_q->dma_etx = addr;
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			tx_q->dma_entx = addr;
+		else
+			tx_q->dma_tx = addr;
 	}
 
 	return 0;
 
 err_dma:
 	free_dma_tx_desc_resources(priv);
-
 	return ret;
 }
 
@@ -1885,6 +1907,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 		if (priv->extend_desc)
 			p = (struct dma_desc *)(tx_q->dma_etx + entry);
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			p = &tx_q->dma_entx[entry].basic;
 		else
 			p = tx_q->dma_tx + entry;
 
@@ -1966,7 +1990,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	/* We still have pending packets, let's call for a new scheduling */
 	if (tx_q->dirty_tx != tx_q->cur_tx)
-		mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
+		mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer));
 
 	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
 
@@ -1983,19 +2007,12 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
-	int i;
 
 	netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, chan));
 
 	stmmac_stop_tx_dma(priv, chan);
 	dma_free_tx_skbufs(priv, chan);
-	for (i = 0; i < DMA_TX_SIZE; i++)
-		if (priv->extend_desc)
-			stmmac_init_tx_desc(priv, &tx_q->dma_etx[i].basic,
-					priv->mode, (i == DMA_TX_SIZE - 1));
-		else
-			stmmac_init_tx_desc(priv, &tx_q->dma_tx[i],
-					priv->mode, (i == DMA_TX_SIZE - 1));
+	stmmac_clear_tx_descriptors(priv, chan);
 	tx_q->dirty_tx = 0;
 	tx_q->cur_tx = 0;
 	tx_q->mss = 0;
@@ -2060,17 +2077,25 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
 	int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
 						 &priv->xstats, chan);
 	struct stmmac_channel *ch = &priv->channel[chan];
+	unsigned long flags;
 
 	if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
 		if (napi_schedule_prep(&ch->rx_napi)) {
-			stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+			spin_lock_irqsave(&ch->lock, flags);
+			stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0);
+			spin_unlock_irqrestore(&ch->lock, flags);
 			__napi_schedule_irqoff(&ch->rx_napi);
-			status |= handle_tx;
 		}
 	}
 
-	if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use))
-		napi_schedule_irqoff(&ch->tx_napi);
+	if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
+		if (napi_schedule_prep(&ch->tx_napi)) {
+			spin_lock_irqsave(&ch->lock, flags);
+			stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1);
+			spin_unlock_irqrestore(&ch->lock, flags);
+			__napi_schedule_irqoff(&ch->tx_napi);
+		}
+	}
 
 	return status;
 }
@@ -2265,14 +2290,14 @@ static void stmmac_tx_timer(struct timer_list *t)
 
 	ch = &priv->channel[tx_q->queue_index];
 
-	/*
-	 * If NAPI is already running we can miss some events. Let's rearm
-	 * the timer and try again.
-	 */
-	if (likely(napi_schedule_prep(&ch->tx_napi)))
+	if (likely(napi_schedule_prep(&ch->tx_napi))) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&ch->lock, flags);
+		stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1);
+		spin_unlock_irqrestore(&ch->lock, flags);
 		__napi_schedule(&ch->tx_napi);
-	else
-		mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
+	}
 }
 
 /**
@@ -2624,6 +2649,14 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	if (priv->dma_cap.vlins)
 		stmmac_enable_vlan(priv, priv->hw, STMMAC_VLAN_INSERT);
 
+	/* TBS */
+	for (chan = 0; chan < tx_cnt; chan++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+		int enable = tx_q->tbs & STMMAC_TBS_AVAIL;
+
+		stmmac_enable_tbs(priv, priv->ioaddr, enable, chan);
+	}
+
 	/* Start the ball rolling... */
 	stmmac_start_all_dma(priv);
 
@@ -2653,8 +2686,7 @@ static int stmmac_open(struct net_device *dev)
 	u32 chan;
 	int ret;
 
-	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
-	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
@@ -2681,6 +2713,16 @@ static int stmmac_open(struct net_device *dev)
 
 	priv->rx_copybreak = STMMAC_RX_COPYBREAK;
 
+	/* Earlier check for TBS */
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+		int tbs_en = priv->plat->tx_queues_cfg[chan].tbs_en;
+
+		tx_q->tbs |= tbs_en ? STMMAC_TBS_AVAIL : 0;
+		if (stmmac_enable_tbs(priv, priv->ioaddr, tbs_en, chan))
+			tx_q->tbs &= ~STMMAC_TBS_AVAIL;
+	}
+
 	ret = alloc_dma_desc_resources(priv);
 	if (ret < 0) {
 		netdev_err(priv->dev, "%s: DMA descriptors allocation failed\n",
@@ -2829,7 +2871,11 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb,
 
 	tag = skb_vlan_tag_get(skb);
 
-	p = tx_q->dma_tx + tx_q->cur_tx;
+	if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		p = &tx_q->dma_entx[tx_q->cur_tx].basic;
+	else
+		p = &tx_q->dma_tx[tx_q->cur_tx];
+
 	if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type))
 		return false;
 
@@ -2864,7 +2910,11 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
 
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
-		desc = tx_q->dma_tx + tx_q->cur_tx;
+
+		if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			desc = &tx_q->dma_entx[tx_q->cur_tx].basic;
+		else
+			desc = &tx_q->dma_tx[tx_q->cur_tx];
 
 		curr_addr = des + (total_len - tmp_len);
 		if (priv->dma_cap.addr64 <= 32)
@@ -2915,13 +2965,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dma_desc *desc, *first, *mss_desc = NULL;
 	struct stmmac_priv *priv = netdev_priv(dev);
+	int desc_size, tmp_pay_len = 0, first_tx;
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	u32 queue = skb_get_queue_mapping(skb);
 	unsigned int first_entry, tx_packets;
-	int tmp_pay_len = 0, first_tx;
 	struct stmmac_tx_queue *tx_q;
-	u8 proto_hdr_len, hdr;
 	bool has_vlan, set_ic;
+	u8 proto_hdr_len, hdr;
 	u32 pay_len, mss;
 	dma_addr_t des;
 	int i;
@@ -2958,7 +3008,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* set new MSS value if needed */
 	if (mss != tx_q->mss) {
-		mss_desc = tx_q->dma_tx + tx_q->cur_tx;
+		if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			mss_desc = &tx_q->dma_entx[tx_q->cur_tx].basic;
+		else
+			mss_desc = &tx_q->dma_tx[tx_q->cur_tx];
+
 		stmmac_set_mss(priv, mss_desc, mss);
 		tx_q->mss = mss;
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
@@ -2978,7 +3032,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	first_entry = tx_q->cur_tx;
 	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
-	desc = tx_q->dma_tx + first_entry;
+	if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		desc = &tx_q->dma_entx[first_entry].basic;
+	else
+		desc = &tx_q->dma_tx[first_entry];
 	first = desc;
 
 	if (has_vlan)
@@ -3050,7 +3107,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		set_ic = false;
 
 	if (set_ic) {
-		desc = &tx_q->dma_tx[tx_q->cur_tx];
+		if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			desc = &tx_q->dma_entx[tx_q->cur_tx].basic;
+		else
+			desc = &tx_q->dma_tx[tx_q->cur_tx];
+
 		tx_q->tx_count_frames = 0;
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
@@ -3113,16 +3174,18 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
 			__func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
 			tx_q->cur_tx, first, nfrags);
-
-		stmmac_display_ring(priv, (void *)tx_q->dma_tx, DMA_TX_SIZE, 0);
-
 		pr_info(">>> frame to be transmitted: ");
 		print_pkt(skb->data, skb_headlen(skb));
 	}
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
+	if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		desc_size = sizeof(struct dma_edesc);
+	else
+		desc_size = sizeof(struct dma_desc);
+
+	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 	stmmac_tx_timer_arm(priv, queue);
 
@@ -3152,10 +3215,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	u32 queue = skb_get_queue_mapping(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int gso = skb_shinfo(skb)->gso_type;
+	struct dma_edesc *tbs_desc = NULL;
+	int entry, desc_size, first_tx;
 	struct dma_desc *desc, *first;
 	struct stmmac_tx_queue *tx_q;
 	bool has_vlan, set_ic;
-	int entry, first_tx;
 	dma_addr_t des;
 
 	tx_q = &priv->tx_queue[queue];
@@ -3195,6 +3259,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (likely(priv->extend_desc))
 		desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+	else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		desc = &tx_q->dma_entx[entry].basic;
 	else
 		desc = tx_q->dma_tx + entry;
 
@@ -3224,6 +3290,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		if (likely(priv->extend_desc))
 			desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			desc = &tx_q->dma_entx[entry].basic;
 		else
 			desc = tx_q->dma_tx + entry;
 
@@ -3270,6 +3338,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (set_ic) {
 		if (likely(priv->extend_desc))
 			desc = &tx_q->dma_etx[entry].basic;
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			desc = &tx_q->dma_entx[entry].basic;
 		else
 			desc = &tx_q->dma_tx[entry];
 
@@ -3287,20 +3357,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_q->cur_tx = entry;
 
 	if (netif_msg_pktdata(priv)) {
-		void *tx_head;
-
 		netdev_dbg(priv->dev,
 			   "%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d",
 			   __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
 			   entry, first, nfrags);
 
-		if (priv->extend_desc)
-			tx_head = (void *)tx_q->dma_etx;
-		else
-			tx_head = (void *)tx_q->dma_tx;
-
-		stmmac_display_ring(priv, tx_head, DMA_TX_SIZE, false);
-
 		netdev_dbg(priv->dev, ">>> frame to be transmitted: ");
 		print_pkt(skb->data, skb->len);
 	}
@@ -3346,12 +3407,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		/* Prepare the first descriptor setting the OWN bit too */
 		stmmac_prepare_tx_desc(priv, first, 1, nopaged_len,
-				csum_insertion, priv->mode, 1, last_segment,
+				csum_insertion, priv->mode, 0, last_segment,
 				skb->len);
-	} else {
-		stmmac_set_tx_owner(priv, first);
 	}
 
+	if (tx_q->tbs & STMMAC_TBS_EN) {
+		struct timespec64 ts = ns_to_timespec64(skb->tstamp);
+
+		tbs_desc = &tx_q->dma_entx[first_entry];
+		stmmac_set_desc_tbs(priv, tbs_desc, ts.tv_sec, ts.tv_nsec);
+	}
+
+	stmmac_set_tx_owner(priv, first);
+
 	/* The own bit must be the latest setting done when prepare the
 	 * descriptor and then barrier is needed to make sure that
 	 * all is coherent before granting the DMA engine.
@@ -3362,7 +3430,14 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
 
-	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
+	if (likely(priv->extend_desc))
+		desc_size = sizeof(struct dma_extended_desc);
+	else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		desc_size = sizeof(struct dma_edesc);
+	else
+		desc_size = sizeof(struct dma_desc);
+
+	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 	stmmac_tx_timer_arm(priv, queue);
 
@@ -3751,8 +3826,14 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 	priv->xstats.napi_poll++;
 
 	work_done = stmmac_rx(priv, budget, chan);
-	if (work_done < budget && napi_complete_done(napi, work_done))
-		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&ch->lock, flags);
+		stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 0);
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
 	return work_done;
 }
 
@@ -3761,7 +3842,6 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, tx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
-	struct stmmac_tx_queue *tx_q;
 	u32 chan = ch->index;
 	int work_done;
 
@@ -3770,15 +3850,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 	work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
 	work_done = min(work_done, budget);
 
-	if (work_done < budget)
-		napi_complete_done(napi, work_done);
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		unsigned long flags;
 
-	/* Force transmission restart */
-	tx_q = &priv->tx_queue[chan];
-	if (tx_q->cur_tx != tx_q->dirty_tx) {
-		stmmac_enable_dma_transmission(priv, priv->ioaddr);
-		stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
-				       chan);
+		spin_lock_irqsave(&ch->lock, flags);
+		stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 0, 1);
+		spin_unlock_irqrestore(&ch->lock, flags);
 	}
 
 	return work_done;
@@ -3792,7 +3869,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
  *   netdev structure and arrange for the device to be reset to a sane state
  *   in order to transmit a new packet.
  */
-static void stmmac_tx_timeout(struct net_device *dev)
+static void stmmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
@@ -4078,6 +4155,10 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 						  priv, priv, true);
 	case TC_SETUP_QDISC_CBS:
 		return stmmac_tc_setup_cbs(priv, priv, type_data);
+	case TC_SETUP_QDISC_TAPRIO:
+		return stmmac_tc_setup_taprio(priv, priv, type_data);
+	case TC_SETUP_QDISC_ETF:
+		return stmmac_tc_setup_etf(priv, priv, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -4181,7 +4262,7 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
 			seq_printf(seq, "Extended descriptor ring:\n");
 			sysfs_display_ring((void *)tx_q->dma_etx,
 					   DMA_TX_SIZE, 1, seq);
-		} else {
+		} else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) {
 			seq_printf(seq, "Descriptor ring:\n");
 			sysfs_display_ring((void *)tx_q->dma_tx,
 					   DMA_TX_SIZE, 0, seq);
@@ -4250,9 +4331,44 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v)
 		   priv->dma_cap.number_rx_channel);
 	seq_printf(seq, "\tNumber of Additional TX channel: %d\n",
 		   priv->dma_cap.number_tx_channel);
+	seq_printf(seq, "\tNumber of Additional RX queues: %d\n",
+		   priv->dma_cap.number_rx_queues);
+	seq_printf(seq, "\tNumber of Additional TX queues: %d\n",
+		   priv->dma_cap.number_tx_queues);
 	seq_printf(seq, "\tEnhanced descriptors: %s\n",
 		   (priv->dma_cap.enh_desc) ? "Y" : "N");
-
+	seq_printf(seq, "\tTX Fifo Size: %d\n", priv->dma_cap.tx_fifo_size);
+	seq_printf(seq, "\tRX Fifo Size: %d\n", priv->dma_cap.rx_fifo_size);
+	seq_printf(seq, "\tHash Table Size: %d\n", priv->dma_cap.hash_tb_sz);
+	seq_printf(seq, "\tTSO: %s\n", priv->dma_cap.tsoen ? "Y" : "N");
+	seq_printf(seq, "\tNumber of PPS Outputs: %d\n",
+		   priv->dma_cap.pps_out_num);
+	seq_printf(seq, "\tSafety Features: %s\n",
+		   priv->dma_cap.asp ? "Y" : "N");
+	seq_printf(seq, "\tFlexible RX Parser: %s\n",
+		   priv->dma_cap.frpsel ? "Y" : "N");
+	seq_printf(seq, "\tEnhanced Addressing: %d\n",
+		   priv->dma_cap.addr64);
+	seq_printf(seq, "\tReceive Side Scaling: %s\n",
+		   priv->dma_cap.rssen ? "Y" : "N");
+	seq_printf(seq, "\tVLAN Hash Filtering: %s\n",
+		   priv->dma_cap.vlhash ? "Y" : "N");
+	seq_printf(seq, "\tSplit Header: %s\n",
+		   priv->dma_cap.sphen ? "Y" : "N");
+	seq_printf(seq, "\tVLAN TX Insertion: %s\n",
+		   priv->dma_cap.vlins ? "Y" : "N");
+	seq_printf(seq, "\tDouble VLAN: %s\n",
+		   priv->dma_cap.dvlan ? "Y" : "N");
+	seq_printf(seq, "\tNumber of L3/L4 Filters: %d\n",
+		   priv->dma_cap.l3l4fnum);
+	seq_printf(seq, "\tARP Offloading: %s\n",
+		   priv->dma_cap.arpoffsel ? "Y" : "N");
+	seq_printf(seq, "\tEnhancements to Scheduled Traffic (EST): %s\n",
+		   priv->dma_cap.estsel ? "Y" : "N");
+	seq_printf(seq, "\tFrame Preemption (FPE): %s\n",
+		   priv->dma_cap.fpesel ? "Y" : "N");
+	seq_printf(seq, "\tTime-Based Scheduling (TBS): %s\n",
+		   priv->dma_cap.tbssel ? "Y" : "N");
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(stmmac_dma_cap);
@@ -4728,6 +4844,7 @@ int stmmac_dvr_probe(struct device *device,
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
 
+		spin_lock_init(&ch->lock);
 		ch->priv_data = priv;
 		ch->index = queue;
 
@@ -4757,8 +4874,7 @@ int stmmac_dvr_probe(struct device *device,
 
 	stmmac_check_pcs_mode(priv);
 
-	if (priv->hw->pcs != STMMAC_PCS_RGMII  &&
-	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI) {
 		/* MDIO bus Registration */
 		ret = stmmac_mdio_register(ndev);
@@ -4792,8 +4908,7 @@ int stmmac_dvr_probe(struct device *device,
 error_netdev_register:
 	phylink_destroy(priv->phylink);
 error_phy_setup:
-	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
-	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 error_mdio_register:
@@ -4838,8 +4953,7 @@ int stmmac_dvr_remove(struct device *dev)
 		reset_control_assert(priv->plat->stmmac_rst);
 	clk_disable_unprepare(priv->plat->pclk);
 	clk_disable_unprepare(priv->plat->stmmac_clk);
-	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
-	    priv->hw->pcs != STMMAC_PCS_TBI &&
+	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 	destroy_workqueue(priv->wq);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 8237dbc3e991..623521052152 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -65,7 +65,6 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 	plat->force_sf_dma_mode = 1;
 
 	plat->mdio_bus_data->needs_reset = true;
-	plat->mdio_bus_data->phy_mask = 0;
 
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
@@ -154,8 +153,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->tx_queues_cfg[6].weight = 0x0F;
 	plat->tx_queues_cfg[7].weight = 0x10;
 
-	plat->mdio_bus_data->phy_mask = 0;
-
 	plat->dma_cfg->pbl = 32;
 	plat->dma_cfg->pblx8 = true;
 	plat->dma_cfg->fixed_burst = 0;
@@ -386,8 +383,6 @@ static int snps_gmac5_default_data(struct pci_dev *pdev,
 	plat->tso_en = 1;
 	plat->pmt = 1;
 
-	plat->mdio_bus_data->phy_mask = 0;
-
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
 
@@ -406,6 +401,8 @@ static int snps_gmac5_default_data(struct pci_dev *pdev,
 		plat->tx_queues_cfg[i].use_prio = false;
 		plat->tx_queues_cfg[i].mode_to_use = MTL_QUEUE_DCB;
 		plat->tx_queues_cfg[i].weight = 25;
+		if (i > 0)
+			plat->tx_queues_cfg[i].tbs_en = 1;
 	}
 
 	plat->rx_sched_algorithm = MTL_RX_ALGORITHM_SP;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 4775f49d7f3b..d10ac54bf385 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -412,9 +412,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		*mac = NULL;
 	}
 
-	rc = of_get_phy_mode(np, &plat->phy_interface);
-	if (rc)
-		return ERR_PTR(rc);
+	plat->phy_interface = device_get_phy_mode(&pdev->dev);
+	if (plat->phy_interface < 0)
+		return ERR_PTR(plat->phy_interface);
 
 	plat->interface = stmmac_of_get_mac_mode(np);
 	if (plat->interface < 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index 450d7dac3ea6..2aba2673d6c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -14,6 +14,7 @@
 #include <linux/phy.h>
 #include <linux/udp.h>
 #include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/tc_act/tc_gact.h>
@@ -50,6 +51,7 @@ struct stmmac_packet_attrs {
 	u8 id;
 	int sarc;
 	u16 queue_mapping;
+	u64 timestamp;
 };
 
 static u8 stmmac_test_next_id;
@@ -208,6 +210,9 @@ static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
 	skb->pkt_type = PACKET_HOST;
 	skb->dev = priv->dev;
 
+	if (attr->timestamp)
+		skb->tstamp = ns_to_ktime(attr->timestamp);
+
 	return skb;
 }
 
@@ -341,8 +346,7 @@ static int __stmmac_test_loopback(struct stmmac_priv *priv,
 		goto cleanup;
 	}
 
-	skb_set_queue_mapping(skb, attr->queue_mapping);
-	ret = dev_queue_xmit(skb);
+	ret = dev_direct_xmit(skb, attr->queue_mapping);
 	if (ret)
 		goto cleanup;
 
@@ -932,8 +936,7 @@ static int __stmmac_test_vlanfilt(struct stmmac_priv *priv)
 			goto vlan_del;
 		}
 
-		skb_set_queue_mapping(skb, 0);
-		ret = dev_queue_xmit(skb);
+		ret = dev_direct_xmit(skb, 0);
 		if (ret)
 			goto vlan_del;
 
@@ -1027,8 +1030,7 @@ static int __stmmac_test_dvlanfilt(struct stmmac_priv *priv)
 			goto vlan_del;
 		}
 
-		skb_set_queue_mapping(skb, 0);
-		ret = dev_queue_xmit(skb);
+		ret = dev_direct_xmit(skb, 0);
 		if (ret)
 			goto vlan_del;
 
@@ -1298,8 +1300,7 @@ static int stmmac_test_vlanoff_common(struct stmmac_priv *priv, bool svlan)
 	__vlan_hwaccel_put_tag(skb, htons(proto), tpriv->vlan_id);
 	skb->protocol = htons(proto);
 
-	skb_set_queue_mapping(skb, 0);
-	ret = dev_queue_xmit(skb);
+	ret = dev_direct_xmit(skb, 0);
 	if (ret)
 		goto vlan_del;
 
@@ -1659,8 +1660,7 @@ static int stmmac_test_arpoffload(struct stmmac_priv *priv)
 	if (ret)
 		goto cleanup;
 
-	skb_set_queue_mapping(skb, 0);
-	ret = dev_queue_xmit(skb);
+	ret = dev_direct_xmit(skb, 0);
 	if (ret)
 		goto cleanup_promisc;
 
@@ -1748,6 +1748,68 @@ static int stmmac_test_sph(struct stmmac_priv *priv)
 	return 0;
 }
 
+static int stmmac_test_tbs(struct stmmac_priv *priv)
+{
+#define STMMAC_TBS_LT_OFFSET		(500 * 1000 * 1000) /* 500 ms*/
+	struct stmmac_packet_attrs attr = { };
+	struct tc_etf_qopt_offload qopt;
+	u64 start_time, curr_time = 0;
+	unsigned long flags;
+	int ret, i;
+
+	if (!priv->hwts_tx_en)
+		return -EOPNOTSUPP;
+
+	/* Find first TBS enabled Queue, if any */
+	for (i = 0; i < priv->plat->tx_queues_to_use; i++)
+		if (priv->tx_queue[i].tbs & STMMAC_TBS_AVAIL)
+			break;
+
+	if (i >= priv->plat->tx_queues_to_use)
+		return -EOPNOTSUPP;
+
+	qopt.enable = true;
+	qopt.queue = i;
+
+	ret = stmmac_tc_setup_etf(priv, priv, &qopt);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&priv->ptp_lock, flags);
+	stmmac_get_systime(priv, priv->ptpaddr, &curr_time);
+	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+
+	if (!curr_time) {
+		ret = -EOPNOTSUPP;
+		goto fail_disable;
+	}
+
+	start_time = curr_time;
+	curr_time += STMMAC_TBS_LT_OFFSET;
+
+	attr.dst = priv->dev->dev_addr;
+	attr.timestamp = curr_time;
+	attr.timeout = nsecs_to_jiffies(2 * STMMAC_TBS_LT_OFFSET);
+	attr.queue_mapping = i;
+
+	ret = __stmmac_test_loopback(priv, &attr);
+	if (ret)
+		goto fail_disable;
+
+	/* Check if expected time has elapsed */
+	spin_lock_irqsave(&priv->ptp_lock, flags);
+	stmmac_get_systime(priv, priv->ptpaddr, &curr_time);
+	spin_unlock_irqrestore(&priv->ptp_lock, flags);
+
+	if ((curr_time - start_time) < STMMAC_TBS_LT_OFFSET)
+		ret = -EINVAL;
+
+fail_disable:
+	qopt.enable = false;
+	stmmac_tc_setup_etf(priv, priv, &qopt);
+	return ret;
+}
+
 #define STMMAC_LOOPBACK_NONE	0
 #define STMMAC_LOOPBACK_MAC	1
 #define STMMAC_LOOPBACK_PHY	2
@@ -1881,6 +1943,10 @@ static const struct stmmac_test {
 		.name = "Split Header               ",
 		.lb = STMMAC_LOOPBACK_PHY,
 		.fn = stmmac_test_sph,
+	}, {
+		.name = "TBS (ETF Scheduler)        ",
+		.lb = STMMAC_LOOPBACK_PHY,
+		.fn = stmmac_test_tbs,
 	},
 };
 
@@ -1889,7 +1955,6 @@ void stmmac_selftest_run(struct net_device *dev,
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int count = stmmac_selftest_get_count(priv);
-	int carrier = netif_carrier_ok(dev);
 	int i, ret;
 
 	memset(buf, 0, sizeof(*buf) * count);
@@ -1899,15 +1964,12 @@ void stmmac_selftest_run(struct net_device *dev,
 		netdev_err(priv->dev, "Only offline tests are supported\n");
 		etest->flags |= ETH_TEST_FL_FAILED;
 		return;
-	} else if (!carrier) {
+	} else if (!netif_carrier_ok(dev)) {
 		netdev_err(priv->dev, "You need valid Link to execute tests\n");
 		etest->flags |= ETH_TEST_FL_FAILED;
 		return;
 	}
 
-	/* We don't want extra traffic */
-	netif_carrier_off(dev);
-
 	/* Wait for queues drain */
 	msleep(200);
 
@@ -1962,10 +2024,6 @@ void stmmac_selftest_run(struct net_device *dev,
 			break;
 		}
 	}
-
-	/* Restart everything */
-	if (carrier)
-		netif_carrier_on(dev);
 }
 
 void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 9ffae12a2122..7a01dee2f9a8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -595,9 +595,167 @@ static int tc_setup_cls(struct stmmac_priv *priv,
 	return ret;
 }
 
+static int tc_setup_taprio(struct stmmac_priv *priv,
+			   struct tc_taprio_qopt_offload *qopt)
+{
+	u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep;
+	struct plat_stmmacenet_data *plat = priv->plat;
+	struct timespec64 time;
+	bool fpe = false;
+	int i, ret = 0;
+	u64 ctr;
+
+	if (!priv->dma_cap.estsel)
+		return -EOPNOTSUPP;
+
+	switch (wid) {
+	case 0x1:
+		wid = 16;
+		break;
+	case 0x2:
+		wid = 20;
+		break;
+	case 0x3:
+		wid = 24;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	switch (dep) {
+	case 0x1:
+		dep = 64;
+		break;
+	case 0x2:
+		dep = 128;
+		break;
+	case 0x3:
+		dep = 256;
+		break;
+	case 0x4:
+		dep = 512;
+		break;
+	case 0x5:
+		dep = 1024;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (!qopt->enable)
+		goto disable;
+	if (qopt->num_entries >= dep)
+		return -EINVAL;
+	if (!qopt->base_time)
+		return -ERANGE;
+	if (!qopt->cycle_time)
+		return -ERANGE;
+
+	if (!plat->est) {
+		plat->est = devm_kzalloc(priv->device, sizeof(*plat->est),
+					 GFP_KERNEL);
+		if (!plat->est)
+			return -ENOMEM;
+	} else {
+		memset(plat->est, 0, sizeof(*plat->est));
+	}
+
+	size = qopt->num_entries;
+
+	priv->plat->est->gcl_size = size;
+	priv->plat->est->enable = qopt->enable;
+
+	for (i = 0; i < size; i++) {
+		s64 delta_ns = qopt->entries[i].interval;
+		u32 gates = qopt->entries[i].gate_mask;
+
+		if (delta_ns > GENMASK(wid, 0))
+			return -ERANGE;
+		if (gates > GENMASK(31 - wid, 0))
+			return -ERANGE;
+
+		switch (qopt->entries[i].command) {
+		case TC_TAPRIO_CMD_SET_GATES:
+			if (fpe)
+				return -EINVAL;
+			break;
+		case TC_TAPRIO_CMD_SET_AND_HOLD:
+			gates |= BIT(0);
+			fpe = true;
+			break;
+		case TC_TAPRIO_CMD_SET_AND_RELEASE:
+			gates &= ~BIT(0);
+			fpe = true;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+
+		priv->plat->est->gcl[i] = delta_ns | (gates << wid);
+	}
+
+	/* Adjust for real system time */
+	time = ktime_to_timespec64(qopt->base_time);
+	priv->plat->est->btr[0] = (u32)time.tv_nsec;
+	priv->plat->est->btr[1] = (u32)time.tv_sec;
+
+	ctr = qopt->cycle_time;
+	priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC);
+	priv->plat->est->ctr[1] = (u32)ctr;
+
+	if (fpe && !priv->dma_cap.fpesel)
+		return -EOPNOTSUPP;
+
+	ret = stmmac_fpe_configure(priv, priv->ioaddr,
+				   priv->plat->tx_queues_to_use,
+				   priv->plat->rx_queues_to_use, fpe);
+	if (ret && fpe) {
+		netdev_err(priv->dev, "failed to enable Frame Preemption\n");
+		return ret;
+	}
+
+	ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
+				   priv->plat->clk_ptp_rate);
+	if (ret) {
+		netdev_err(priv->dev, "failed to configure EST\n");
+		goto disable;
+	}
+
+	netdev_info(priv->dev, "configured EST\n");
+	return 0;
+
+disable:
+	priv->plat->est->enable = false;
+	stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
+			     priv->plat->clk_ptp_rate);
+	return ret;
+}
+
+static int tc_setup_etf(struct stmmac_priv *priv,
+			struct tc_etf_qopt_offload *qopt)
+{
+	if (!priv->dma_cap.tbssel)
+		return -EOPNOTSUPP;
+	if (qopt->queue >= priv->plat->tx_queues_to_use)
+		return -EINVAL;
+	if (!(priv->tx_queue[qopt->queue].tbs & STMMAC_TBS_AVAIL))
+		return -EINVAL;
+
+	if (qopt->enable)
+		priv->tx_queue[qopt->queue].tbs |= STMMAC_TBS_EN;
+	else
+		priv->tx_queue[qopt->queue].tbs &= ~STMMAC_TBS_EN;
+
+	netdev_info(priv->dev, "%s ETF for Queue %d\n",
+		    qopt->enable ? "enabled" : "disabled", qopt->queue);
+	return 0;
+}
+
 const struct stmmac_tc_ops dwmac510_tc_ops = {
 	.init = tc_init,
 	.setup_cls_u32 = tc_setup_cls_u32,
 	.setup_cbs = tc_setup_cbs,
 	.setup_cls = tc_setup_cls,
+	.setup_taprio = tc_setup_taprio,
+	.setup_etf = tc_setup_etf,
 };
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index c91876f8c536..6ec9163e232c 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -2666,7 +2666,7 @@ static void cas_netpoll(struct net_device *dev)
 }
 #endif
 
-static void cas_tx_timeout(struct net_device *dev)
+static void cas_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct cas *cp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index f5fd1f3c07cc..9a5004f674c7 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6517,7 +6517,7 @@ static void niu_reset_task(struct work_struct *work)
 	spin_unlock_irqrestore(&np->lock, flags);
 }
 
-static void niu_tx_timeout(struct net_device *dev)
+static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct niu *np = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index e9b757b03b56..c5add0b45eed 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -941,7 +941,7 @@ static int bigmac_close(struct net_device *dev)
 	return 0;
 }
 
-static void bigmac_tx_timeout(struct net_device *dev)
+static void bigmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct bigmac *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 3e7631160384..8358064fbd48 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -970,7 +970,7 @@ static void gem_poll_controller(struct net_device *dev)
 }
 #endif
 
-static void gem_tx_timeout(struct net_device *dev)
+static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct gem *gp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index d007dfeba5c3..f0fe7bb2a750 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -2246,7 +2246,7 @@ static int happy_meal_close(struct net_device *dev)
 #define SXD(x)
 #endif
 
-static void happy_meal_tx_timeout(struct net_device *dev)
+static void happy_meal_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c
index 1468fa0a54e9..2102b95ec347 100644
--- a/drivers/net/ethernet/sun/sunqe.c
+++ b/drivers/net/ethernet/sun/sunqe.c
@@ -544,7 +544,7 @@ static void qe_tx_reclaim(struct sunqe *qep)
 	qep->tx_old = elem;
 }
 
-static void qe_tx_timeout(struct net_device *dev)
+static void qe_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct sunqe *qep = netdev_priv(dev);
 	int tx_full;
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index 8b94d9ad9e2b..c23ce838ff63 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1223,7 +1223,7 @@ vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
 {
 	struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
-	struct sk_buff *segs;
+	struct sk_buff *segs, *curr, *next;
 	int maclen, datalen;
 	int status;
 	int gso_size, gso_type, gso_segs;
@@ -1282,11 +1282,8 @@ vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
 	skb_reset_mac_header(skb);
 
 	status = 0;
-	while (segs) {
-		struct sk_buff *curr = segs;
-
-		segs = segs->next;
-		curr->next = NULL;
+	skb_list_walk_safe(segs, curr, next) {
+		skb_mark_not_on_list(curr);
 		if (port->tso && curr->len > dev->mtu) {
 			skb_shinfo(curr)->gso_size = gso_size;
 			skb_shinfo(curr)->gso_type = gso_type;
@@ -1539,7 +1536,7 @@ out_dropped:
 }
 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common);
 
-void sunvnet_tx_timeout_common(struct net_device *dev)
+void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue)
 {
 	/* XXX Implement me XXX */
 }
diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h
index 2b808d2482d6..5416a3cb9e7d 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.h
+++ b/drivers/net/ethernet/sun/sunvnet_common.h
@@ -135,7 +135,7 @@ int sunvnet_open_common(struct net_device *dev);
 int sunvnet_close_common(struct net_device *dev);
 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp);
 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p);
-void sunvnet_tx_timeout_common(struct net_device *dev);
+void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue);
 netdev_tx_t
 sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
 			  struct vnet_port *(*vnet_tx_port)
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
index a1f5a1e61040..07046a2370b3 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
@@ -689,7 +689,7 @@ static int xlgmac_close(struct net_device *netdev)
 	return 0;
 }
 
-static void xlgmac_tx_timeout(struct net_device *netdev)
+static void xlgmac_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct xlgmac_pdata *pdata = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 3a655a4dc10e..a530afe3ce12 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -797,7 +797,7 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void cpmac_tx_timeout(struct net_device *dev)
+static void cpmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct cpmac_priv *priv = netdev_priv(dev);
 
@@ -816,16 +816,6 @@ static void cpmac_tx_timeout(struct net_device *dev)
 	netif_tx_wake_all_queues(priv->dev);
 }
 
-static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	if (!(netif_running(dev)))
-		return -EINVAL;
-	if (!dev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(dev->phydev, ifr, cmd);
-}
-
 static void cpmac_get_ringparam(struct net_device *dev,
 						struct ethtool_ringparam *ring)
 {
@@ -1054,7 +1044,7 @@ static const struct net_device_ops cpmac_netdev_ops = {
 	.ndo_start_xmit		= cpmac_start_xmit,
 	.ndo_tx_timeout		= cpmac_tx_timeout,
 	.ndo_set_rx_mode	= cpmac_set_multicast_list,
-	.ndo_do_ioctl		= cpmac_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 707d5eb480ce..97a058ca60ac 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -272,7 +272,7 @@ void soft_reset(const char *module, void __iomem *reg)
 	WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module);
 }
 
-void cpsw_ndo_tx_timeout(struct net_device *ndev)
+void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index bc726356a72c..b8d7b924ee3d 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -449,7 +449,7 @@ int cpsw_rx_poll(struct napi_struct *napi_rx, int budget);
 void cpsw_rx_vlan_encap(struct sk_buff *skb);
 void soft_reset(const char *module, void __iomem *reg);
 void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv);
-void cpsw_ndo_tx_timeout(struct net_device *ndev);
+void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue);
 int cpsw_need_resplit(struct cpsw_common *cpsw);
 int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd);
 int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index ae27be85e363..75d4e16c692b 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -983,7 +983,7 @@ fail_tx:
  * error and re-initialize the TX channel for hardware operation
  *
  */
-static void emac_dev_tx_timeout(struct net_device *ndev)
+static void emac_dev_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct emac_priv *priv = netdev_priv(ndev);
 	struct device *emac_dev = &ndev->dev;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 1b2702f74455..d7a144b4a09f 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1811,7 +1811,7 @@ out:
 	return (ret == 0) ? 0 : err;
 }
 
-static void netcp_ndo_tx_timeout(struct net_device *ndev)
+static void netcp_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct netcp_intf *netcp = netdev_priv(ndev);
 	unsigned int descs = knav_pool_count(netcp->tx_pool);
@@ -2019,7 +2019,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
 			goto quit;
 		}
 
-		efuse = devm_ioremap_nocache(dev, res.start, size);
+		efuse = devm_ioremap(dev, res.start, size);
 		if (!efuse) {
 			dev_err(dev, "could not map resource\n");
 			devm_release_mem_region(dev, res.start, size);
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index d6a192c1f337..fb36115e9c51 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2533,8 +2533,6 @@ static int gbe_del_vid(void *intf_priv, int vid)
 }
 
 #if IS_ENABLED(CONFIG_TI_CPTS)
-#define HAS_PHY_TXTSTAMP(p) ((p)->drv && (p)->drv->txtstamp)
-#define HAS_PHY_RXTSTAMP(p) ((p)->drv && (p)->drv->rxtstamp)
 
 static void gbe_txtstamp(void *context, struct sk_buff *skb)
 {
@@ -2566,7 +2564,7 @@ static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
 	 * We mark it here because skb_tx_timestamp() is called
 	 * after all the txhooks are called.
 	 */
-	if (phydev && HAS_PHY_TXTSTAMP(phydev)) {
+	if (phy_has_txtstamp(phydev)) {
 		skb_shinfo(p_info->skb)->tx_flags |= SKBTX_IN_PROGRESS;
 		return 0;
 	}
@@ -2588,7 +2586,7 @@ static int gbe_rxtstamp(struct gbe_intf *gbe_intf, struct netcp_packet *p_info)
 	if (p_info->rxtstamp_complete)
 		return 0;
 
-	if (phydev && HAS_PHY_RXTSTAMP(phydev)) {
+	if (phy_has_rxtstamp(phydev)) {
 		p_info->rxtstamp_complete = true;
 		return 0;
 	}
@@ -2830,7 +2828,7 @@ static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd)
 	struct gbe_intf *gbe_intf = intf_priv;
 	struct phy_device *phy = gbe_intf->slave->phy;
 
-	if (!phy || !phy->drv->hwtstamp) {
+	if (!phy_has_hwtstamp(phy)) {
 		switch (cmd) {
 		case SIOCGHWTSTAMP:
 			return gbe_hwtstamp_get(gbe_intf, req);
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 78f0f2d59e22..ad465202980a 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -161,7 +161,7 @@ static void	tlan_set_multicast_list(struct net_device *);
 static int	tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static int      tlan_probe1(struct pci_dev *pdev, long ioaddr,
 			    int irq, int rev, const struct pci_device_id *ent);
-static void	tlan_tx_timeout(struct net_device *dev);
+static void	tlan_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void	tlan_tx_timeout_work(struct work_struct *work);
 static int	tlan_init_one(struct pci_dev *pdev,
 			      const struct pci_device_id *ent);
@@ -997,7 +997,7 @@ static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
  *
  **************************************************************/
 
-static void tlan_tx_timeout(struct net_device *dev)
+static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 
 	TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Transmit timed out.\n", dev->name);
@@ -1028,7 +1028,7 @@ static void tlan_tx_timeout_work(struct work_struct *work)
 	struct tlan_priv	*priv =
 		container_of(work, struct tlan_priv, tlan_tqueue);
 
-	tlan_tx_timeout(priv->dev);
+	tlan_tx_timeout(priv->dev, UINT_MAX);
 }
 
 
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 9d9f8acb7ee3..070dd6fa9401 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1405,7 +1405,7 @@ out:
  *
  * called, if tx hangs. Schedules a task that resets the interface
  */
-void gelic_net_tx_timeout(struct net_device *netdev)
+void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct gelic_card *card;
 
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
index 051033580f0a..805903dbddcc 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
@@ -359,7 +359,7 @@ int gelic_net_open(struct net_device *netdev);
 int gelic_net_stop(struct net_device *netdev);
 netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
 void gelic_net_set_multi(struct net_device *netdev);
-void gelic_net_tx_timeout(struct net_device *netdev);
+void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card);
 
 /* shared ethtool ops */
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 538e70810d3d..6576271642c1 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -2180,7 +2180,7 @@ out:
  * called, if tx hangs. Schedules a task that resets the interface
  */
 static void
-spider_net_tx_timeout(struct net_device *netdev)
+spider_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct spider_net_card *card;
 
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index 12466a72cefc..3fd43d30b20d 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -483,8 +483,7 @@ static void	tc35815_txdone(struct net_device *dev);
 static int	tc35815_close(struct net_device *dev);
 static struct	net_device_stats *tc35815_get_stats(struct net_device *dev);
 static void	tc35815_set_multicast_list(struct net_device *dev);
-static void	tc35815_tx_timeout(struct net_device *dev);
-static int	tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static void	tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void	tc35815_poll_controller(struct net_device *dev);
 #endif
@@ -751,7 +750,7 @@ static const struct net_device_ops tc35815_netdev_ops = {
 	.ndo_get_stats		= tc35815_get_stats,
 	.ndo_set_rx_mode	= tc35815_set_multicast_list,
 	.ndo_tx_timeout		= tc35815_tx_timeout,
-	.ndo_do_ioctl		= tc35815_ioctl,
+	.ndo_do_ioctl		= phy_do_ioctl_running,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1189,7 +1188,7 @@ static void tc35815_schedule_restart(struct net_device *dev)
 	spin_unlock_irqrestore(&lp->lock, flags);
 }
 
-static void tc35815_tx_timeout(struct net_device *dev)
+static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct tc35815_regs __iomem *tr =
 		(struct tc35815_regs __iomem *)dev->base_addr;
@@ -2009,15 +2008,6 @@ static const struct ethtool_ops tc35815_ethtool_ops = {
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
-static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-	if (!netif_running(dev))
-		return -EINVAL;
-	if (!dev->phydev)
-		return -ENODEV;
-	return phy_mii_ioctl(dev->phydev, rq, cmd);
-}
-
 static void tc35815_chip_reset(struct net_device *dev)
 {
 	struct tc35815_regs __iomem *tr =
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index ed12dbd156f0..803247d51fe9 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -506,7 +506,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val
 static int  rhine_open(struct net_device *dev);
 static void rhine_reset_task(struct work_struct *work);
 static void rhine_slow_event_task(struct work_struct *work);
-static void rhine_tx_timeout(struct net_device *dev);
+static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 				  struct net_device *dev);
 static irqreturn_t rhine_interrupt(int irq, void *dev_instance);
@@ -1761,7 +1761,7 @@ out_unlock:
 	mutex_unlock(&rp->task_lock);
 }
 
-static void rhine_tx_timeout(struct net_device *dev)
+static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 346e44115c4e..4b556b74541a 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -3257,12 +3257,16 @@ static struct platform_driver velocity_platform_driver = {
  *	@dev: network device
  *
  *	Called before an ethtool operation. We need to make sure the
- *	chip is out of D3 state before we poke at it.
+ *	chip is out of D3 state before we poke at it. In case of ethtool
+ *	ops nesting, only wake the device up in the outermost block.
  */
 static int velocity_ethtool_up(struct net_device *dev)
 {
 	struct velocity_info *vptr = netdev_priv(dev);
-	if (!netif_running(dev))
+
+	if (vptr->ethtool_ops_nesting == U32_MAX)
+		return -EBUSY;
+	if (!vptr->ethtool_ops_nesting++ && !netif_running(dev))
 		velocity_set_power_state(vptr, PCI_D0);
 	return 0;
 }
@@ -3272,12 +3276,14 @@ static int velocity_ethtool_up(struct net_device *dev)
  *	@dev: network device
  *
  *	Called after an ethtool operation. Restore the chip back to D3
- *	state if it isn't running.
+ *	state if it isn't running. In case of ethtool ops nesting, only
+ *	put the device to sleep in the outermost block.
  */
 static void velocity_ethtool_down(struct net_device *dev)
 {
 	struct velocity_info *vptr = netdev_priv(dev);
-	if (!netif_running(dev))
+
+	if (!--vptr->ethtool_ops_nesting && !netif_running(dev))
 		velocity_set_power_state(vptr, PCI_D3hot);
 }
 
diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h
index cdfe7809e3c1..f196e71d2c04 100644
--- a/drivers/net/ethernet/via/via-velocity.h
+++ b/drivers/net/ethernet/via/via-velocity.h
@@ -1483,6 +1483,7 @@ struct velocity_info {
 	struct velocity_context context;
 
 	u32 ticks;
+	u32 ethtool_ops_nesting;
 
 	u8 rev_id;
 
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index bede1ff289c5..c0d181a7f83a 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -790,7 +790,7 @@ static void w5100_restart_work(struct work_struct *work)
 	w5100_restart(priv->ndev);
 }
 
-static void w5100_tx_timeout(struct net_device *ndev)
+static void w5100_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct w5100_priv *priv = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index 6ba2747779ce..46aae30c4636 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -341,7 +341,7 @@ static void w5300_get_regs(struct net_device *ndev,
 	}
 }
 
-static void w5300_tx_timeout(struct net_device *ndev)
+static void w5300_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 {
 	struct w5300_priv *priv = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 21c1b4322ea7..6f11f52c9a9e 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1080,17 +1080,6 @@ temac_poll_controller(struct net_device *ndev)
 }
 #endif
 
-static int temac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
-{
-	if (!netif_running(ndev))
-		return -EINVAL;
-
-	if (!ndev->phydev)
-		return -EINVAL;
-
-	return phy_mii_ioctl(ndev->phydev, rq, cmd);
-}
-
 static const struct net_device_ops temac_netdev_ops = {
 	.ndo_open = temac_open,
 	.ndo_stop = temac_stop,
@@ -1098,7 +1087,7 @@ static const struct net_device_ops temac_netdev_ops = {
 	.ndo_set_rx_mode = temac_set_multicast_list,
 	.ndo_set_mac_address = temac_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
-	.ndo_do_ioctl = temac_ioctl,
+	.ndo_do_ioctl = phy_do_ioctl_running,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = temac_poll_controller,
 #endif
@@ -1202,7 +1191,7 @@ static int temac_probe(struct platform_device *pdev)
 
 	/* map device registers */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	lp->regs = devm_ioremap_nocache(&pdev->dev, res->start,
+	lp->regs = devm_ioremap(&pdev->dev, res->start,
 					resource_size(res));
 	if (IS_ERR(lp->regs)) {
 		dev_err(&pdev->dev, "could not map TEMAC registers\n");
@@ -1296,7 +1285,7 @@ static int temac_probe(struct platform_device *pdev)
 	} else if (pdata) {
 		/* 2nd memory resource specifies DMA registers */
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-		lp->sdma_regs = devm_ioremap_nocache(&pdev->dev, res->start,
+		lp->sdma_regs = devm_ioremap(&pdev->dev, res->start,
 						     resource_size(res));
 		if (IS_ERR(lp->sdma_regs)) {
 			dev_err(&pdev->dev,
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 0de52e70abcc..0c26f5bcc523 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -521,7 +521,7 @@ static int xemaclite_set_mac_address(struct net_device *dev, void *address)
  *
  * This function is called when Tx time out occurs for Emaclite device.
  */
-static void xemaclite_tx_timeout(struct net_device *dev)
+static void xemaclite_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct net_local *lp = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index fd5288ff53b5..480ab7251515 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -288,7 +288,7 @@ struct local_info {
  */
 static netdev_tx_t do_start_xmit(struct sk_buff *skb,
 				       struct net_device *dev);
-static void xirc_tx_timeout(struct net_device *dev);
+static void xirc_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void xirc2ps_tx_timeout_task(struct work_struct *work);
 static void set_addresses(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
@@ -1203,7 +1203,7 @@ xirc2ps_tx_timeout_task(struct work_struct *work)
 }
 
 static void
-xirc_tx_timeout(struct net_device *dev)
+xirc_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
     struct local_info *lp = netdev_priv(dev);
     dev->stats.tx_errors++;
diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig
index cd0a8f46e7c6..98aa7b8ddb06 100644
--- a/drivers/net/ethernet/xscale/Kconfig
+++ b/drivers/net/ethernet/xscale/Kconfig
@@ -27,4 +27,18 @@ config IXP4XX_ETH
 	  Say Y here if you want to use built-in Ethernet ports
 	  on IXP4xx processor.
 
+config PTP_1588_CLOCK_IXP46X
+	tristate "Intel IXP46x as PTP clock"
+	depends on IXP4XX_ETH
+	depends on PTP_1588_CLOCK
+	default y
+	help
+	  This driver adds support for using the IXP46X as a PTP
+	  clock. This clock is only useful if your PTP programs are
+	  getting hardware time stamps on the PTP Ethernet packets
+	  using the SO_TIMESTAMPING API.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp_ixp46x.
+
 endif # NET_VENDOR_XSCALE
diff --git a/drivers/net/ethernet/xscale/Makefile b/drivers/net/ethernet/xscale/Makefile
index 794a519d07b3..607f91b1e878 100644
--- a/drivers/net/ethernet/xscale/Makefile
+++ b/drivers/net/ethernet/xscale/Makefile
@@ -3,4 +3,5 @@
 # Makefile for the Intel XScale IXP device drivers.
 #
 
-obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o
+obj-$(CONFIG_IXP4XX_ETH)		+= ixp4xx_eth.o
+obj-$(CONFIG_PTP_1588_CLOCK_IXP46X)	+= ptp_ixp46x.o
diff --git a/drivers/net/ethernet/xscale/ixp46x_ts.h b/drivers/net/ethernet/xscale/ixp46x_ts.h
new file mode 100644
index 000000000000..d792130e27b0
--- /dev/null
+++ b/drivers/net/ethernet/xscale/ixp46x_ts.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PTP 1588 clock using the IXP46X
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ */
+
+#ifndef _IXP46X_TS_H_
+#define _IXP46X_TS_H_
+
+#define DEFAULT_ADDEND 0xF0000029
+#define TICKS_NS_SHIFT 4
+
+struct ixp46x_channel_ctl {
+	u32 ch_control;  /* 0x40 Time Synchronization Channel Control */
+	u32 ch_event;    /* 0x44 Time Synchronization Channel Event */
+	u32 tx_snap_lo;  /* 0x48 Transmit Snapshot Low Register */
+	u32 tx_snap_hi;  /* 0x4C Transmit Snapshot High Register */
+	u32 rx_snap_lo;  /* 0x50 Receive Snapshot Low Register */
+	u32 rx_snap_hi;  /* 0x54 Receive Snapshot High Register */
+	u32 src_uuid_lo; /* 0x58 Source UUID0 Low Register */
+	u32 src_uuid_hi; /* 0x5C Sequence Identifier/Source UUID0 High */
+};
+
+struct ixp46x_ts_regs {
+	u32 control;     /* 0x00 Time Sync Control Register */
+	u32 event;       /* 0x04 Time Sync Event Register */
+	u32 addend;      /* 0x08 Time Sync Addend Register */
+	u32 accum;       /* 0x0C Time Sync Accumulator Register */
+	u32 test;        /* 0x10 Time Sync Test Register */
+	u32 unused;      /* 0x14 */
+	u32 rsystime_lo; /* 0x18 RawSystemTime_Low Register */
+	u32 rsystime_hi; /* 0x1C RawSystemTime_High Register */
+	u32 systime_lo;  /* 0x20 SystemTime_Low Register */
+	u32 systime_hi;  /* 0x24 SystemTime_High Register */
+	u32 trgt_lo;     /* 0x28 TargetTime_Low Register */
+	u32 trgt_hi;     /* 0x2C TargetTime_High Register */
+	u32 asms_lo;     /* 0x30 Auxiliary Slave Mode Snapshot Low  */
+	u32 asms_hi;     /* 0x34 Auxiliary Slave Mode Snapshot High */
+	u32 amms_lo;     /* 0x38 Auxiliary Master Mode Snapshot Low */
+	u32 amms_hi;     /* 0x3C Auxiliary Master Mode Snapshot High */
+
+	struct ixp46x_channel_ctl channel[3];
+};
+
+/* 0x00 Time Sync Control Register Bits */
+#define TSCR_AMM (1<<3)
+#define TSCR_ASM (1<<2)
+#define TSCR_TTM (1<<1)
+#define TSCR_RST (1<<0)
+
+/* 0x04 Time Sync Event Register Bits */
+#define TSER_SNM (1<<3)
+#define TSER_SNS (1<<2)
+#define TTIPEND  (1<<1)
+
+/* 0x40 Time Synchronization Channel Control Register Bits */
+#define MASTER_MODE   (1<<0)
+#define TIMESTAMP_ALL (1<<1)
+
+/* 0x44 Time Synchronization Channel Event Register Bits */
+#define TX_SNAPSHOT_LOCKED (1<<0)
+#define RX_SNAPSHOT_LOCKED (1<<1)
+
+/* The ptp_ixp46x module will set this variable */
+extern int ixp46x_phc_index;
+
+#endif
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 6fc04ffb22c2..269596c15133 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -29,14 +29,16 @@
 #include <linux/net_tstamp.h>
 #include <linux/of.h>
 #include <linux/phy.h>
+#include <linux/platform_data/eth_ixp4xx.h>
 #include <linux/platform_device.h>
 #include <linux/ptp_classify.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <mach/ixp46x_ts.h>
 #include <linux/soc/ixp4xx/npe.h>
 #include <linux/soc/ixp4xx/qmgr.h>
 
+#include "ixp46x_ts.h"
+
 #define DEBUG_DESC		0
 #define DEBUG_RX		0
 #define DEBUG_TX		0
@@ -517,25 +519,14 @@ static int ixp4xx_mdio_write(struct mii_bus *bus, int phy_id, int location,
 	return ret;
 }
 
-static int ixp4xx_mdio_register(void)
+static int ixp4xx_mdio_register(struct eth_regs __iomem *regs)
 {
 	int err;
 
 	if (!(mdio_bus = mdiobus_alloc()))
 		return -ENOMEM;
 
-	if (cpu_is_ixp43x()) {
-		/* IXP43x lacks NPE-B and uses NPE-C for MII PHY access */
-		if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEC_ETH))
-			return -ENODEV;
-		mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT;
-	} else {
-		/* All MII PHY accesses use NPE-B Ethernet registers */
-		if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEB_ETH0))
-			return -ENODEV;
-		mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT;
-	}
-
+	mdio_regs = regs;
 	__raw_writel(DEFAULT_CORE_CNTRL, &mdio_regs->core_control);
 	spin_lock_init(&mdio_lock);
 	mdio_bus->name = "IXP4xx MII Bus";
@@ -581,8 +572,8 @@ static void ixp4xx_adjust_link(struct net_device *dev)
 		__raw_writel(DEFAULT_TX_CNTRL0 | TX_CNTRL0_HALFDUPLEX,
 			     &port->regs->tx_control[0]);
 
-	printk(KERN_INFO "%s: link up, speed %u Mb/s, %s duplex\n",
-	       dev->name, port->speed, port->duplex ? "full" : "half");
+	netdev_info(dev, "%s: link up, speed %u Mb/s, %s duplex\n",
+		    dev->name, port->speed, port->duplex ? "full" : "half");
 }
 
 
@@ -592,7 +583,7 @@ static inline void debug_pkt(struct net_device *dev, const char *func,
 #if DEBUG_PKT_BYTES
 	int i;
 
-	printk(KERN_DEBUG "%s: %s(%i) ", dev->name, func, len);
+	netdev_debug(dev, "%s(%i) ", func, len);
 	for (i = 0; i < len; i++) {
 		if (i >= DEBUG_PKT_BYTES)
 			break;
@@ -683,7 +674,7 @@ static int eth_poll(struct napi_struct *napi, int budget)
 	int received = 0;
 
 #if DEBUG_RX
-	printk(KERN_DEBUG "%s: eth_poll\n", dev->name);
+	netdev_debug(dev, "eth_poll\n");
 #endif
 
 	while (received < budget) {
@@ -697,23 +688,20 @@ static int eth_poll(struct napi_struct *napi, int budget)
 
 		if ((n = queue_get_desc(rxq, port, 0)) < 0) {
 #if DEBUG_RX
-			printk(KERN_DEBUG "%s: eth_poll napi_complete\n",
-			       dev->name);
+			netdev_debug(dev, "eth_poll napi_complete\n");
 #endif
 			napi_complete(napi);
 			qmgr_enable_irq(rxq);
 			if (!qmgr_stat_below_low_watermark(rxq) &&
 			    napi_reschedule(napi)) { /* not empty again */
 #if DEBUG_RX
-				printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n",
-				       dev->name);
+				netdev_debug(dev, "eth_poll napi_reschedule succeeded\n");
 #endif
 				qmgr_disable_irq(rxq);
 				continue;
 			}
 #if DEBUG_RX
-			printk(KERN_DEBUG "%s: eth_poll all done\n",
-			       dev->name);
+			netdev_debug(dev, "eth_poll all done\n");
 #endif
 			return received; /* all work done */
 		}
@@ -778,7 +766,7 @@ static int eth_poll(struct napi_struct *napi, int budget)
 	}
 
 #if DEBUG_RX
-	printk(KERN_DEBUG "eth_poll(): end, not all work done\n");
+	netdev_debug(dev, "eth_poll(): end, not all work done\n");
 #endif
 	return received;		/* not all work done */
 }
@@ -842,7 +830,7 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct desc *desc;
 
 #if DEBUG_TX
-	printk(KERN_DEBUG "%s: eth_xmit\n", dev->name);
+	netdev_debug(dev, "eth_xmit\n");
 #endif
 
 	if (unlikely(skb->len > MAX_MRU)) {
@@ -897,22 +885,21 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (qmgr_stat_below_low_watermark(txreadyq)) { /* empty */
 #if DEBUG_TX
-		printk(KERN_DEBUG "%s: eth_xmit queue full\n", dev->name);
+		netdev_debug(dev, "eth_xmit queue full\n");
 #endif
 		netif_stop_queue(dev);
 		/* we could miss TX ready interrupt */
 		/* really empty in fact */
 		if (!qmgr_stat_below_low_watermark(txreadyq)) {
 #if DEBUG_TX
-			printk(KERN_DEBUG "%s: eth_xmit ready again\n",
-			       dev->name);
+			netdev_debug(dev, "eth_xmit ready again\n");
 #endif
 			netif_wake_queue(dev);
 		}
 	}
 
 #if DEBUG_TX
-	printk(KERN_DEBUG "%s: eth_xmit end\n", dev->name);
+	netdev_debug(dev, "eth_xmit end\n");
 #endif
 
 	ixp_tx_timestamp(port, skb);
@@ -1099,7 +1086,7 @@ static int init_queues(struct port *port)
 	int i;
 
 	if (!ports_open) {
-		dma_pool = dma_pool_create(DRV_NAME, &port->netdev->dev,
+		dma_pool = dma_pool_create(DRV_NAME, port->netdev->dev.parent,
 					   POOL_ALLOC_SIZE, 32, 0);
 		if (!dma_pool)
 			return -ENOMEM;
@@ -1186,8 +1173,7 @@ static int eth_open(struct net_device *dev)
 			return err;
 
 		if (npe_recv_message(npe, &msg, "ETH_GET_STATUS")) {
-			printk(KERN_ERR "%s: %s not responding\n", dev->name,
-			       npe_name(npe));
+			netdev_err(dev, "%s not responding\n", npe_name(npe));
 			return -EIO;
 		}
 		port->firmware[0] = msg.byte4;
@@ -1299,7 +1285,7 @@ static int eth_close(struct net_device *dev)
 	msg.eth_id = port->id;
 	msg.byte3 = 1;
 	if (npe_send_recv_message(port->npe, &msg, "ETH_ENABLE_LOOPBACK"))
-		printk(KERN_CRIT "%s: unable to enable loopback\n", dev->name);
+		netdev_crit(dev, "unable to enable loopback\n");
 
 	i = 0;
 	do {			/* drain RX buffers */
@@ -1323,11 +1309,11 @@ static int eth_close(struct net_device *dev)
 	} while (++i < MAX_CLOSE_WAIT);
 
 	if (buffs)
-		printk(KERN_CRIT "%s: unable to drain RX queue, %i buffer(s)"
-		       " left in NPE\n", dev->name, buffs);
+		netdev_crit(dev, "unable to drain RX queue, %i buffer(s)"
+			    " left in NPE\n", buffs);
 #if DEBUG_CLOSE
 	if (!buffs)
-		printk(KERN_DEBUG "Draining RX queue took %i cycles\n", i);
+		netdev_debug(dev, "draining RX queue took %i cycles\n", i);
 #endif
 
 	buffs = TX_DESCS;
@@ -1343,17 +1329,16 @@ static int eth_close(struct net_device *dev)
 	} while (++i < MAX_CLOSE_WAIT);
 
 	if (buffs)
-		printk(KERN_CRIT "%s: unable to drain TX queue, %i buffer(s) "
-		       "left in NPE\n", dev->name, buffs);
+		netdev_crit(dev, "unable to drain TX queue, %i buffer(s) "
+			    "left in NPE\n", buffs);
 #if DEBUG_CLOSE
 	if (!buffs)
-		printk(KERN_DEBUG "Draining TX queues took %i cycles\n", i);
+		netdev_debug(dev, "draining TX queues took %i cycles\n", i);
 #endif
 
 	msg.byte3 = 0;
 	if (npe_send_recv_message(port->npe, &msg, "ETH_DISABLE_LOOPBACK"))
-		printk(KERN_CRIT "%s: unable to disable loopback\n",
-		       dev->name);
+		netdev_crit(dev, "unable to disable loopback\n");
 
 	phy_stop(dev->phydev);
 
@@ -1374,54 +1359,88 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 };
 
-static int eth_init_one(struct platform_device *pdev)
+static int ixp4xx_eth_probe(struct platform_device *pdev)
 {
-	struct port *port;
-	struct net_device *dev;
-	struct eth_plat_info *plat = dev_get_platdata(&pdev->dev);
-	struct phy_device *phydev = NULL;
-	u32 regs_phys;
 	char phy_id[MII_BUS_ID_SIZE + 3];
+	struct phy_device *phydev = NULL;
+	struct device *dev = &pdev->dev;
+	struct eth_plat_info *plat;
+	resource_size_t regs_phys;
+	struct net_device *ndev;
+	struct resource *res;
+	struct port *port;
 	int err;
 
-	if (!(dev = alloc_etherdev(sizeof(struct port))))
+	plat = dev_get_platdata(dev);
+
+	if (!(ndev = devm_alloc_etherdev(dev, sizeof(struct port))))
 		return -ENOMEM;
 
-	SET_NETDEV_DEV(dev, &pdev->dev);
-	port = netdev_priv(dev);
-	port->netdev = dev;
+	SET_NETDEV_DEV(ndev, dev);
+	port = netdev_priv(ndev);
+	port->netdev = ndev;
 	port->id = pdev->id;
 
+	/* Get the port resource and remap */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+	regs_phys = res->start;
+	port->regs = devm_ioremap_resource(dev, res);
+
 	switch (port->id) {
 	case IXP4XX_ETH_NPEA:
-		port->regs = (struct eth_regs __iomem *)IXP4XX_EthA_BASE_VIRT;
-		regs_phys  = IXP4XX_EthA_BASE_PHYS;
+		/* If the MDIO bus is not up yet, defer probe */
+		if (!mdio_bus)
+			return -EPROBE_DEFER;
 		break;
 	case IXP4XX_ETH_NPEB:
-		port->regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT;
-		regs_phys  = IXP4XX_EthB_BASE_PHYS;
+		/*
+		 * On all except IXP43x, NPE-B is used for the MDIO bus.
+		 * If there is no NPE-B in the feature set, bail out, else
+		 * register the MDIO bus.
+		 */
+		if (!cpu_is_ixp43x()) {
+			if (!(ixp4xx_read_feature_bits() &
+			      IXP4XX_FEATURE_NPEB_ETH0))
+				return -ENODEV;
+			/* Else register the MDIO bus on NPE-B */
+			if ((err = ixp4xx_mdio_register(port->regs)))
+				return err;
+		}
+		if (!mdio_bus)
+			return -EPROBE_DEFER;
 		break;
 	case IXP4XX_ETH_NPEC:
-		port->regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT;
-		regs_phys  = IXP4XX_EthC_BASE_PHYS;
+		/*
+		 * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access,
+		 * of there is no NPE-C, no bus, nothing works, so bail out.
+		 */
+		if (cpu_is_ixp43x()) {
+			if (!(ixp4xx_read_feature_bits() &
+			      IXP4XX_FEATURE_NPEC_ETH))
+				return -ENODEV;
+			/* Else register the MDIO bus on NPE-C */
+			if ((err = ixp4xx_mdio_register(port->regs)))
+				return err;
+		}
+		if (!mdio_bus)
+			return -EPROBE_DEFER;
 		break;
 	default:
-		err = -ENODEV;
-		goto err_free;
+		return -ENODEV;
 	}
 
-	dev->netdev_ops = &ixp4xx_netdev_ops;
-	dev->ethtool_ops = &ixp4xx_ethtool_ops;
-	dev->tx_queue_len = 100;
+	ndev->netdev_ops = &ixp4xx_netdev_ops;
+	ndev->ethtool_ops = &ixp4xx_ethtool_ops;
+	ndev->tx_queue_len = 100;
 
-	netif_napi_add(dev, &port->napi, eth_poll, NAPI_WEIGHT);
+	netif_napi_add(ndev, &port->napi, eth_poll, NAPI_WEIGHT);
 
-	if (!(port->npe = npe_request(NPE_ID(port->id)))) {
-		err = -EIO;
-		goto err_free;
-	}
+	if (!(port->npe = npe_request(NPE_ID(port->id))))
+		return -EIO;
 
-	port->mem_res = request_mem_region(regs_phys, REGS_SIZE, dev->name);
+	port->mem_res = request_mem_region(regs_phys, REGS_SIZE, ndev->name);
 	if (!port->mem_res) {
 		err = -EBUSY;
 		goto err_npe_rel;
@@ -1429,9 +1448,9 @@ static int eth_init_one(struct platform_device *pdev)
 
 	port->plat = plat;
 	npe_port_tab[NPE_ID(port->id)] = port;
-	memcpy(dev->dev_addr, plat->hwaddr, ETH_ALEN);
+	memcpy(ndev->dev_addr, plat->hwaddr, ETH_ALEN);
 
-	platform_set_drvdata(pdev, dev);
+	platform_set_drvdata(pdev, ndev);
 
 	__raw_writel(DEFAULT_CORE_CNTRL | CORE_RESET,
 		     &port->regs->core_control);
@@ -1441,7 +1460,7 @@ static int eth_init_one(struct platform_device *pdev)
 
 	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
 		mdio_bus->id, plat->phy);
-	phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
+	phydev = phy_connect(ndev, phy_id, &ixp4xx_adjust_link,
 			     PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(phydev)) {
 		err = PTR_ERR(phydev);
@@ -1450,11 +1469,11 @@ static int eth_init_one(struct platform_device *pdev)
 
 	phydev->irq = PHY_POLL;
 
-	if ((err = register_netdev(dev)))
+	if ((err = register_netdev(ndev)))
 		goto err_phy_dis;
 
-	printk(KERN_INFO "%s: MII PHY %i on %s\n", dev->name, plat->phy,
-	       npe_name(port->npe));
+	netdev_info(ndev, "%s: MII PHY %i on %s\n", ndev->name, plat->phy,
+		    npe_name(port->npe));
 
 	return 0;
 
@@ -1465,58 +1484,32 @@ err_free_mem:
 	release_resource(port->mem_res);
 err_npe_rel:
 	npe_release(port->npe);
-err_free:
-	free_netdev(dev);
 	return err;
 }
 
-static int eth_remove_one(struct platform_device *pdev)
+static int ixp4xx_eth_remove(struct platform_device *pdev)
 {
-	struct net_device *dev = platform_get_drvdata(pdev);
-	struct phy_device *phydev = dev->phydev;
-	struct port *port = netdev_priv(dev);
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct phy_device *phydev = ndev->phydev;
+	struct port *port = netdev_priv(ndev);
 
-	unregister_netdev(dev);
+	unregister_netdev(ndev);
 	phy_disconnect(phydev);
+	ixp4xx_mdio_remove();
 	npe_port_tab[NPE_ID(port->id)] = NULL;
 	npe_release(port->npe);
 	release_resource(port->mem_res);
-	free_netdev(dev);
 	return 0;
 }
 
 static struct platform_driver ixp4xx_eth_driver = {
 	.driver.name	= DRV_NAME,
-	.probe		= eth_init_one,
-	.remove		= eth_remove_one,
+	.probe		= ixp4xx_eth_probe,
+	.remove		= ixp4xx_eth_remove,
 };
-
-static int __init eth_init_module(void)
-{
-	int err;
-
-	/*
-	 * FIXME: we bail out on device tree boot but this really needs
-	 * to be fixed in a nicer way: this registers the MDIO bus before
-	 * even matching the driver infrastructure, we should only probe
-	 * detected hardware.
-	 */
-	if (of_have_populated_dt())
-		return -ENODEV;
-	if ((err = ixp4xx_mdio_register()))
-		return err;
-	return platform_driver_register(&ixp4xx_eth_driver);
-}
-
-static void __exit eth_cleanup_module(void)
-{
-	platform_driver_unregister(&ixp4xx_eth_driver);
-	ixp4xx_mdio_remove();
-}
+module_platform_driver(ixp4xx_eth_driver);
 
 MODULE_AUTHOR("Krzysztof Halasa");
 MODULE_DESCRIPTION("Intel IXP4xx Ethernet driver");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:ixp4xx_eth");
-module_init(eth_init_module);
-module_exit(eth_cleanup_module);
diff --git a/drivers/net/ethernet/xscale/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c
new file mode 100644
index 000000000000..9ecc395239e9
--- /dev/null
+++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PTP 1588 clock using the IXP46X
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/ptp_clock_kernel.h>
+
+#include "ixp46x_ts.h"
+
+#define DRIVER		"ptp_ixp46x"
+#define N_EXT_TS	2
+#define MASTER_GPIO	8
+#define MASTER_IRQ	25
+#define SLAVE_GPIO	7
+#define SLAVE_IRQ	24
+
+struct ixp_clock {
+	struct ixp46x_ts_regs *regs;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info caps;
+	int exts0_enabled;
+	int exts1_enabled;
+};
+
+DEFINE_SPINLOCK(register_lock);
+
+/*
+ * Register access functions
+ */
+
+static u64 ixp_systime_read(struct ixp46x_ts_regs *regs)
+{
+	u64 ns;
+	u32 lo, hi;
+
+	lo = __raw_readl(&regs->systime_lo);
+	hi = __raw_readl(&regs->systime_hi);
+
+	ns = ((u64) hi) << 32;
+	ns |= lo;
+	ns <<= TICKS_NS_SHIFT;
+
+	return ns;
+}
+
+static void ixp_systime_write(struct ixp46x_ts_regs *regs, u64 ns)
+{
+	u32 hi, lo;
+
+	ns >>= TICKS_NS_SHIFT;
+	hi = ns >> 32;
+	lo = ns & 0xffffffff;
+
+	__raw_writel(lo, &regs->systime_lo);
+	__raw_writel(hi, &regs->systime_hi);
+}
+
+/*
+ * Interrupt service routine
+ */
+
+static irqreturn_t isr(int irq, void *priv)
+{
+	struct ixp_clock *ixp_clock = priv;
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+	struct ptp_clock_event event;
+	u32 ack = 0, lo, hi, val;
+
+	val = __raw_readl(&regs->event);
+
+	if (val & TSER_SNS) {
+		ack |= TSER_SNS;
+		if (ixp_clock->exts0_enabled) {
+			hi = __raw_readl(&regs->asms_hi);
+			lo = __raw_readl(&regs->asms_lo);
+			event.type = PTP_CLOCK_EXTTS;
+			event.index = 0;
+			event.timestamp = ((u64) hi) << 32;
+			event.timestamp |= lo;
+			event.timestamp <<= TICKS_NS_SHIFT;
+			ptp_clock_event(ixp_clock->ptp_clock, &event);
+		}
+	}
+
+	if (val & TSER_SNM) {
+		ack |= TSER_SNM;
+		if (ixp_clock->exts1_enabled) {
+			hi = __raw_readl(&regs->amms_hi);
+			lo = __raw_readl(&regs->amms_lo);
+			event.type = PTP_CLOCK_EXTTS;
+			event.index = 1;
+			event.timestamp = ((u64) hi) << 32;
+			event.timestamp |= lo;
+			event.timestamp <<= TICKS_NS_SHIFT;
+			ptp_clock_event(ixp_clock->ptp_clock, &event);
+		}
+	}
+
+	if (val & TTIPEND)
+		ack |= TTIPEND; /* this bit seems to be always set */
+
+	if (ack) {
+		__raw_writel(ack, &regs->event);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_ixp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	u64 adj;
+	u32 diff, addend;
+	int neg_adj = 0;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+	addend = DEFAULT_ADDEND;
+	adj = addend;
+	adj *= ppb;
+	diff = div_u64(adj, 1000000000ULL);
+
+	addend = neg_adj ? addend - diff : addend + diff;
+
+	__raw_writel(addend, &regs->addend);
+
+	return 0;
+}
+
+static int ptp_ixp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	s64 now;
+	unsigned long flags;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	spin_lock_irqsave(&register_lock, flags);
+
+	now = ixp_systime_read(regs);
+	now += delta;
+	ixp_systime_write(regs, now);
+
+	spin_unlock_irqrestore(&register_lock, flags);
+
+	return 0;
+}
+
+static int ptp_ixp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	u64 ns;
+	unsigned long flags;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	spin_lock_irqsave(&register_lock, flags);
+
+	ns = ixp_systime_read(regs);
+
+	spin_unlock_irqrestore(&register_lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+	return 0;
+}
+
+static int ptp_ixp_settime(struct ptp_clock_info *ptp,
+			   const struct timespec64 *ts)
+{
+	u64 ns;
+	unsigned long flags;
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+	struct ixp46x_ts_regs *regs = ixp_clock->regs;
+
+	ns = timespec64_to_ns(ts);
+
+	spin_lock_irqsave(&register_lock, flags);
+
+	ixp_systime_write(regs, ns);
+
+	spin_unlock_irqrestore(&register_lock, flags);
+
+	return 0;
+}
+
+static int ptp_ixp_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
+{
+	struct ixp_clock *ixp_clock = container_of(ptp, struct ixp_clock, caps);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		switch (rq->extts.index) {
+		case 0:
+			ixp_clock->exts0_enabled = on ? 1 : 0;
+			break;
+		case 1:
+			ixp_clock->exts1_enabled = on ? 1 : 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+		return 0;
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static const struct ptp_clock_info ptp_ixp_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "IXP46X timer",
+	.max_adj	= 66666655,
+	.n_ext_ts	= N_EXT_TS,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfreq	= ptp_ixp_adjfreq,
+	.adjtime	= ptp_ixp_adjtime,
+	.gettime64	= ptp_ixp_gettime,
+	.settime64	= ptp_ixp_settime,
+	.enable		= ptp_ixp_enable,
+};
+
+/* module operations */
+
+static struct ixp_clock ixp_clock;
+
+static int setup_interrupt(int gpio)
+{
+	int irq;
+	int err;
+
+	err = gpio_request(gpio, "ixp4-ptp");
+	if (err)
+		return err;
+
+	err = gpio_direction_input(gpio);
+	if (err)
+		return err;
+
+	irq = gpio_to_irq(gpio);
+	if (irq < 0)
+		return irq;
+
+	err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING);
+	if (err) {
+		pr_err("cannot set trigger type for irq %d\n", irq);
+		return err;
+	}
+
+	err = request_irq(irq, isr, 0, DRIVER, &ixp_clock);
+	if (err) {
+		pr_err("request_irq failed for irq %d\n", irq);
+		return err;
+	}
+
+	return irq;
+}
+
+static void __exit ptp_ixp_exit(void)
+{
+	free_irq(MASTER_IRQ, &ixp_clock);
+	free_irq(SLAVE_IRQ, &ixp_clock);
+	ixp46x_phc_index = -1;
+	ptp_clock_unregister(ixp_clock.ptp_clock);
+}
+
+static int __init ptp_ixp_init(void)
+{
+	if (!cpu_is_ixp46x())
+		return -ENODEV;
+
+	ixp_clock.regs =
+		(struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+
+	ixp_clock.caps = ptp_ixp_caps;
+
+	ixp_clock.ptp_clock = ptp_clock_register(&ixp_clock.caps, NULL);
+
+	if (IS_ERR(ixp_clock.ptp_clock))
+		return PTR_ERR(ixp_clock.ptp_clock);
+
+	ixp46x_phc_index = ptp_clock_index(ixp_clock.ptp_clock);
+
+	__raw_writel(DEFAULT_ADDEND, &ixp_clock.regs->addend);
+	__raw_writel(1, &ixp_clock.regs->trgt_lo);
+	__raw_writel(0, &ixp_clock.regs->trgt_hi);
+	__raw_writel(TTIPEND, &ixp_clock.regs->event);
+
+	if (MASTER_IRQ != setup_interrupt(MASTER_GPIO)) {
+		pr_err("failed to setup gpio %d as irq\n", MASTER_GPIO);
+		goto no_master;
+	}
+	if (SLAVE_IRQ != setup_interrupt(SLAVE_GPIO)) {
+		pr_err("failed to setup gpio %d as irq\n", SLAVE_GPIO);
+		goto no_slave;
+	}
+
+	return 0;
+no_slave:
+	free_irq(MASTER_IRQ, &ixp_clock);
+no_master:
+	ptp_clock_unregister(ixp_clock.ptp_clock);
+	return -ENODEV;
+}
+
+module_init(ptp_ixp_init);
+module_exit(ptp_ixp_exit);
+
+MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
+MODULE_DESCRIPTION("PTP clock using the IXP46X timer");
+MODULE_LICENSE("GPL");